• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * jsimd_i386.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright 2009-2011, 2013-2014 D. R. Commander
6  *
7  * Based on the x86 SIMD extension for IJG JPEG library,
8  * Copyright (C) 1999-2006, MIYASAKA Masaru.
9  * For conditions of distribution and use, see copyright notice in jsimdext.inc
10  *
11  * This file contains the interface between the "normal" portions
12  * of the library and the SIMD implementations when running on a
13  * 32-bit x86 architecture.
14  */
15 
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
23 
24 /*
25  * In the PIC cases, we have no guarantee that constants will keep
26  * their alignment. This macro allows us to verify it at runtime.
27  */
28 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
29 
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31 
32 static unsigned int simd_support = ~0;
33 
34 /*
35  * Check what SIMD accelerations are supported.
36  *
37  * FIXME: This code is racy under a multi-threaded environment.
38  */
39 LOCAL(void)
init_simd(void)40 init_simd (void)
41 {
42   char *env = NULL;
43 
44   if (simd_support != ~0U)
45     return;
46 
47   simd_support = jpeg_simd_cpu_support();
48 
49   /* Force different settings through environment variables */
50   env = getenv("JSIMD_FORCEMMX");
51   if ((env != NULL) && (strcmp(env, "1") == 0))
52     simd_support &= JSIMD_MMX;
53   env = getenv("JSIMD_FORCE3DNOW");
54   if ((env != NULL) && (strcmp(env, "1") == 0))
55     simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56   env = getenv("JSIMD_FORCESSE");
57   if ((env != NULL) && (strcmp(env, "1") == 0))
58     simd_support &= JSIMD_SSE|JSIMD_MMX;
59   env = getenv("JSIMD_FORCESSE2");
60   if ((env != NULL) && (strcmp(env, "1") == 0))
61     simd_support &= JSIMD_SSE2;
62   env = getenv("JSIMD_FORCENONE");
63   if ((env != NULL) && (strcmp(env, "1") == 0))
64     simd_support = 0;
65 }
66 
67 GLOBAL(int)
jsimd_can_rgb_ycc(void)68 jsimd_can_rgb_ycc (void)
69 {
70   init_simd();
71 
72   /* The code is optimised for these values only */
73   if (BITS_IN_JSAMPLE != 8)
74     return 0;
75   if (sizeof(JDIMENSION) != 4)
76     return 0;
77   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
78     return 0;
79 
80   if ((simd_support & JSIMD_SSE2) &&
81       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
82     return 1;
83   if (simd_support & JSIMD_MMX)
84     return 1;
85 
86   return 0;
87 }
88 
89 GLOBAL(int)
jsimd_can_rgb_gray(void)90 jsimd_can_rgb_gray (void)
91 {
92   init_simd();
93 
94   /* The code is optimised for these values only */
95   if (BITS_IN_JSAMPLE != 8)
96     return 0;
97   if (sizeof(JDIMENSION) != 4)
98     return 0;
99   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
100     return 0;
101 
102   if ((simd_support & JSIMD_SSE2) &&
103       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
104     return 1;
105   if (simd_support & JSIMD_MMX)
106     return 1;
107 
108   return 0;
109 }
110 
111 GLOBAL(int)
jsimd_can_ycc_rgb(void)112 jsimd_can_ycc_rgb (void)
113 {
114   init_simd();
115 
116   /* The code is optimised for these values only */
117   if (BITS_IN_JSAMPLE != 8)
118     return 0;
119   if (sizeof(JDIMENSION) != 4)
120     return 0;
121   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122     return 0;
123 
124   if ((simd_support & JSIMD_SSE2) &&
125       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
126     return 1;
127   if (simd_support & JSIMD_MMX)
128     return 1;
129 
130   return 0;
131 }
132 
133 GLOBAL(int)
jsimd_can_ycc_rgb565(void)134 jsimd_can_ycc_rgb565 (void)
135 {
136   return 0;
137 }
138 
139 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)140 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
141                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
142                        JDIMENSION output_row, int num_rows)
143 {
144   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
145   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146 
147   switch(cinfo->in_color_space) {
148     case JCS_EXT_RGB:
149       sse2fct=jsimd_extrgb_ycc_convert_sse2;
150       mmxfct=jsimd_extrgb_ycc_convert_mmx;
151       break;
152     case JCS_EXT_RGBX:
153     case JCS_EXT_RGBA:
154       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
155       mmxfct=jsimd_extrgbx_ycc_convert_mmx;
156       break;
157     case JCS_EXT_BGR:
158       sse2fct=jsimd_extbgr_ycc_convert_sse2;
159       mmxfct=jsimd_extbgr_ycc_convert_mmx;
160       break;
161     case JCS_EXT_BGRX:
162     case JCS_EXT_BGRA:
163       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
164       mmxfct=jsimd_extbgrx_ycc_convert_mmx;
165       break;
166     case JCS_EXT_XBGR:
167     case JCS_EXT_ABGR:
168       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
169       mmxfct=jsimd_extxbgr_ycc_convert_mmx;
170       break;
171     case JCS_EXT_XRGB:
172     case JCS_EXT_ARGB:
173       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
174       mmxfct=jsimd_extxrgb_ycc_convert_mmx;
175       break;
176     default:
177       sse2fct=jsimd_rgb_ycc_convert_sse2;
178       mmxfct=jsimd_rgb_ycc_convert_mmx;
179       break;
180   }
181 
182   if ((simd_support & JSIMD_SSE2) &&
183       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
184     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
185   else if (simd_support & JSIMD_MMX)
186     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
187 }
188 
189 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)190 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
191                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
192                         JDIMENSION output_row, int num_rows)
193 {
194   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
195   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196 
197   switch(cinfo->in_color_space) {
198     case JCS_EXT_RGB:
199       sse2fct=jsimd_extrgb_gray_convert_sse2;
200       mmxfct=jsimd_extrgb_gray_convert_mmx;
201       break;
202     case JCS_EXT_RGBX:
203     case JCS_EXT_RGBA:
204       sse2fct=jsimd_extrgbx_gray_convert_sse2;
205       mmxfct=jsimd_extrgbx_gray_convert_mmx;
206       break;
207     case JCS_EXT_BGR:
208       sse2fct=jsimd_extbgr_gray_convert_sse2;
209       mmxfct=jsimd_extbgr_gray_convert_mmx;
210       break;
211     case JCS_EXT_BGRX:
212     case JCS_EXT_BGRA:
213       sse2fct=jsimd_extbgrx_gray_convert_sse2;
214       mmxfct=jsimd_extbgrx_gray_convert_mmx;
215       break;
216     case JCS_EXT_XBGR:
217     case JCS_EXT_ABGR:
218       sse2fct=jsimd_extxbgr_gray_convert_sse2;
219       mmxfct=jsimd_extxbgr_gray_convert_mmx;
220       break;
221     case JCS_EXT_XRGB:
222     case JCS_EXT_ARGB:
223       sse2fct=jsimd_extxrgb_gray_convert_sse2;
224       mmxfct=jsimd_extxrgb_gray_convert_mmx;
225       break;
226     default:
227       sse2fct=jsimd_rgb_gray_convert_sse2;
228       mmxfct=jsimd_rgb_gray_convert_mmx;
229       break;
230   }
231 
232   if ((simd_support & JSIMD_SSE2) &&
233       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
234     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
235   else if (simd_support & JSIMD_MMX)
236     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
237 }
238 
239 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)240 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
241                        JSAMPIMAGE input_buf, JDIMENSION input_row,
242                        JSAMPARRAY output_buf, int num_rows)
243 {
244   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
245   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
246 
247   switch(cinfo->out_color_space) {
248     case JCS_EXT_RGB:
249       sse2fct=jsimd_ycc_extrgb_convert_sse2;
250       mmxfct=jsimd_ycc_extrgb_convert_mmx;
251       break;
252     case JCS_EXT_RGBX:
253     case JCS_EXT_RGBA:
254       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
255       mmxfct=jsimd_ycc_extrgbx_convert_mmx;
256       break;
257     case JCS_EXT_BGR:
258       sse2fct=jsimd_ycc_extbgr_convert_sse2;
259       mmxfct=jsimd_ycc_extbgr_convert_mmx;
260       break;
261     case JCS_EXT_BGRX:
262     case JCS_EXT_BGRA:
263       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
264       mmxfct=jsimd_ycc_extbgrx_convert_mmx;
265       break;
266     case JCS_EXT_XBGR:
267     case JCS_EXT_ABGR:
268       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
269       mmxfct=jsimd_ycc_extxbgr_convert_mmx;
270       break;
271     case JCS_EXT_XRGB:
272     case JCS_EXT_ARGB:
273       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
274       mmxfct=jsimd_ycc_extxrgb_convert_mmx;
275       break;
276     default:
277       sse2fct=jsimd_ycc_rgb_convert_sse2;
278       mmxfct=jsimd_ycc_rgb_convert_mmx;
279       break;
280   }
281 
282   if ((simd_support & JSIMD_SSE2) &&
283       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
284     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
285   else if (simd_support & JSIMD_MMX)
286     mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
287 }
288 
289 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)290 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
291                           JSAMPIMAGE input_buf, JDIMENSION input_row,
292                           JSAMPARRAY output_buf, int num_rows)
293 {
294 }
295 
296 GLOBAL(int)
jsimd_can_h2v2_downsample(void)297 jsimd_can_h2v2_downsample (void)
298 {
299   init_simd();
300 
301   /* The code is optimised for these values only */
302   if (BITS_IN_JSAMPLE != 8)
303     return 0;
304   if (sizeof(JDIMENSION) != 4)
305     return 0;
306 
307   if (simd_support & JSIMD_SSE2)
308     return 1;
309   if (simd_support & JSIMD_MMX)
310     return 1;
311 
312   return 0;
313 }
314 
315 GLOBAL(int)
jsimd_can_h2v1_downsample(void)316 jsimd_can_h2v1_downsample (void)
317 {
318   init_simd();
319 
320   /* The code is optimised for these values only */
321   if (BITS_IN_JSAMPLE != 8)
322     return 0;
323   if (sizeof(JDIMENSION) != 4)
324     return 0;
325 
326   if (simd_support & JSIMD_SSE2)
327     return 1;
328   if (simd_support & JSIMD_MMX)
329     return 1;
330 
331   return 0;
332 }
333 
334 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)335 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
336                        JSAMPARRAY input_data, JSAMPARRAY output_data)
337 {
338   if (simd_support & JSIMD_SSE2)
339     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
340                                compptr->v_samp_factor,
341                                compptr->width_in_blocks, input_data,
342                                output_data);
343   else if (simd_support & JSIMD_MMX)
344     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
345                               compptr->v_samp_factor, compptr->width_in_blocks,
346                               input_data, output_data);
347 }
348 
349 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)350 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
351                        JSAMPARRAY input_data, JSAMPARRAY output_data)
352 {
353   if (simd_support & JSIMD_SSE2)
354     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355                                compptr->v_samp_factor,
356                                compptr->width_in_blocks, input_data,
357                                output_data);
358   else if (simd_support & JSIMD_MMX)
359     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
360                               compptr->v_samp_factor, compptr->width_in_blocks,
361                               input_data, output_data);
362 }
363 
364 GLOBAL(int)
jsimd_can_h2v2_upsample(void)365 jsimd_can_h2v2_upsample (void)
366 {
367   init_simd();
368 
369   /* The code is optimised for these values only */
370   if (BITS_IN_JSAMPLE != 8)
371     return 0;
372   if (sizeof(JDIMENSION) != 4)
373     return 0;
374 
375   if (simd_support & JSIMD_SSE2)
376     return 1;
377   if (simd_support & JSIMD_MMX)
378     return 1;
379 
380   return 0;
381 }
382 
383 GLOBAL(int)
jsimd_can_h2v1_upsample(void)384 jsimd_can_h2v1_upsample (void)
385 {
386   init_simd();
387 
388   /* The code is optimised for these values only */
389   if (BITS_IN_JSAMPLE != 8)
390     return 0;
391   if (sizeof(JDIMENSION) != 4)
392     return 0;
393 
394   if (simd_support & JSIMD_SSE2)
395     return 1;
396   if (simd_support & JSIMD_MMX)
397     return 1;
398 
399   return 0;
400 }
401 
402 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)403 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
404                      jpeg_component_info * compptr,
405                      JSAMPARRAY input_data,
406                      JSAMPARRAY * output_data_ptr)
407 {
408   if (simd_support & JSIMD_SSE2)
409     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
410                              input_data, output_data_ptr);
411   else if (simd_support & JSIMD_MMX)
412     jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
413                             input_data, output_data_ptr);
414 }
415 
416 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)417 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
418                      jpeg_component_info * compptr,
419                      JSAMPARRAY input_data,
420                      JSAMPARRAY * output_data_ptr)
421 {
422   if (simd_support & JSIMD_SSE2)
423     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
424                              input_data, output_data_ptr);
425   else if (simd_support & JSIMD_MMX)
426     jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
427                             input_data, output_data_ptr);
428 }
429 
430 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)431 jsimd_can_h2v2_fancy_upsample (void)
432 {
433   init_simd();
434 
435   /* The code is optimised for these values only */
436   if (BITS_IN_JSAMPLE != 8)
437     return 0;
438   if (sizeof(JDIMENSION) != 4)
439     return 0;
440 
441   if ((simd_support & JSIMD_SSE2) &&
442       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
443     return 1;
444   if (simd_support & JSIMD_MMX)
445     return 1;
446 
447   return 0;
448 }
449 
450 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)451 jsimd_can_h2v1_fancy_upsample (void)
452 {
453   init_simd();
454 
455   /* The code is optimised for these values only */
456   if (BITS_IN_JSAMPLE != 8)
457     return 0;
458   if (sizeof(JDIMENSION) != 4)
459     return 0;
460 
461   if ((simd_support & JSIMD_SSE2) &&
462       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463     return 1;
464   if (simd_support & JSIMD_MMX)
465     return 1;
466 
467   return 0;
468 }
469 
470 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)471 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
472                            jpeg_component_info * compptr,
473                            JSAMPARRAY input_data,
474                            JSAMPARRAY * output_data_ptr)
475 {
476   if ((simd_support & JSIMD_SSE2) &&
477       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
478     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
479                                    compptr->downsampled_width, input_data,
480                                    output_data_ptr);
481   else if (simd_support & JSIMD_MMX)
482     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
483                                   compptr->downsampled_width, input_data,
484                                   output_data_ptr);
485 }
486 
487 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)488 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
489                            jpeg_component_info * compptr,
490                            JSAMPARRAY input_data,
491                            JSAMPARRAY * output_data_ptr)
492 {
493   if ((simd_support & JSIMD_SSE2) &&
494       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
495     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
496                                    compptr->downsampled_width, input_data,
497                                    output_data_ptr);
498   else if (simd_support & JSIMD_MMX)
499     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
500                                   compptr->downsampled_width, input_data,
501                                   output_data_ptr);
502 }
503 
504 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)505 jsimd_can_h2v2_merged_upsample (void)
506 {
507   init_simd();
508 
509   /* The code is optimised for these values only */
510   if (BITS_IN_JSAMPLE != 8)
511     return 0;
512   if (sizeof(JDIMENSION) != 4)
513     return 0;
514 
515   if ((simd_support & JSIMD_SSE2) &&
516       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
517     return 1;
518   if (simd_support & JSIMD_MMX)
519     return 1;
520 
521   return 0;
522 }
523 
524 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)525 jsimd_can_h2v1_merged_upsample (void)
526 {
527   init_simd();
528 
529   /* The code is optimised for these values only */
530   if (BITS_IN_JSAMPLE != 8)
531     return 0;
532   if (sizeof(JDIMENSION) != 4)
533     return 0;
534 
535   if ((simd_support & JSIMD_SSE2) &&
536       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
537     return 1;
538   if (simd_support & JSIMD_MMX)
539     return 1;
540 
541   return 0;
542 }
543 
544 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)545 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
546                             JSAMPIMAGE input_buf,
547                             JDIMENSION in_row_group_ctr,
548                             JSAMPARRAY output_buf)
549 {
550   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
552 
553   switch(cinfo->out_color_space) {
554     case JCS_EXT_RGB:
555       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
556       mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
557       break;
558     case JCS_EXT_RGBX:
559     case JCS_EXT_RGBA:
560       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
561       mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
562       break;
563     case JCS_EXT_BGR:
564       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
565       mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
566       break;
567     case JCS_EXT_BGRX:
568     case JCS_EXT_BGRA:
569       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
570       mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
571       break;
572     case JCS_EXT_XBGR:
573     case JCS_EXT_ABGR:
574       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
575       mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
576       break;
577     case JCS_EXT_XRGB:
578     case JCS_EXT_ARGB:
579       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
580       mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
581       break;
582     default:
583       sse2fct=jsimd_h2v2_merged_upsample_sse2;
584       mmxfct=jsimd_h2v2_merged_upsample_mmx;
585       break;
586   }
587 
588   if ((simd_support & JSIMD_SSE2) &&
589       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
590     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591   else if (simd_support & JSIMD_MMX)
592     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
593 }
594 
595 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)596 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
597                             JSAMPIMAGE input_buf,
598                             JDIMENSION in_row_group_ctr,
599                             JSAMPARRAY output_buf)
600 {
601   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
602   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
603 
604   switch(cinfo->out_color_space) {
605     case JCS_EXT_RGB:
606       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
607       mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
608       break;
609     case JCS_EXT_RGBX:
610     case JCS_EXT_RGBA:
611       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
612       mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
613       break;
614     case JCS_EXT_BGR:
615       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
616       mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
617       break;
618     case JCS_EXT_BGRX:
619     case JCS_EXT_BGRA:
620       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
621       mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
622       break;
623     case JCS_EXT_XBGR:
624     case JCS_EXT_ABGR:
625       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
626       mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
627       break;
628     case JCS_EXT_XRGB:
629     case JCS_EXT_ARGB:
630       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
631       mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
632       break;
633     default:
634       sse2fct=jsimd_h2v1_merged_upsample_sse2;
635       mmxfct=jsimd_h2v1_merged_upsample_mmx;
636       break;
637   }
638 
639   if ((simd_support & JSIMD_SSE2) &&
640       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
641     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
642   else if (simd_support & JSIMD_MMX)
643     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
644 }
645 
646 GLOBAL(int)
jsimd_can_convsamp(void)647 jsimd_can_convsamp (void)
648 {
649   init_simd();
650 
651   /* The code is optimised for these values only */
652   if (DCTSIZE != 8)
653     return 0;
654   if (BITS_IN_JSAMPLE != 8)
655     return 0;
656   if (sizeof(JDIMENSION) != 4)
657     return 0;
658   if (sizeof(DCTELEM) != 2)
659     return 0;
660 
661   if (simd_support & JSIMD_SSE2)
662     return 1;
663   if (simd_support & JSIMD_MMX)
664     return 1;
665 
666   return 0;
667 }
668 
669 GLOBAL(int)
jsimd_can_convsamp_float(void)670 jsimd_can_convsamp_float (void)
671 {
672   init_simd();
673 
674   /* The code is optimised for these values only */
675   if (DCTSIZE != 8)
676     return 0;
677   if (BITS_IN_JSAMPLE != 8)
678     return 0;
679   if (sizeof(JDIMENSION) != 4)
680     return 0;
681   if (sizeof(FAST_FLOAT) != 4)
682     return 0;
683 
684   if (simd_support & JSIMD_SSE2)
685     return 1;
686   if (simd_support & JSIMD_SSE)
687     return 1;
688   if (simd_support & JSIMD_3DNOW)
689     return 1;
690 
691   return 0;
692 }
693 
694 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)695 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
696                 DCTELEM * workspace)
697 {
698   if (simd_support & JSIMD_SSE2)
699     jsimd_convsamp_sse2(sample_data, start_col, workspace);
700   else if (simd_support & JSIMD_MMX)
701     jsimd_convsamp_mmx(sample_data, start_col, workspace);
702 }
703 
704 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)705 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
706                       FAST_FLOAT * workspace)
707 {
708   if (simd_support & JSIMD_SSE2)
709     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
710   else if (simd_support & JSIMD_SSE)
711     jsimd_convsamp_float_sse(sample_data, start_col, workspace);
712   else if (simd_support & JSIMD_3DNOW)
713     jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
714 }
715 
716 GLOBAL(int)
jsimd_can_fdct_islow(void)717 jsimd_can_fdct_islow (void)
718 {
719   init_simd();
720 
721   /* The code is optimised for these values only */
722   if (DCTSIZE != 8)
723     return 0;
724   if (sizeof(DCTELEM) != 2)
725     return 0;
726 
727   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
728     return 1;
729   if (simd_support & JSIMD_MMX)
730     return 1;
731 
732   return 0;
733 }
734 
735 GLOBAL(int)
jsimd_can_fdct_ifast(void)736 jsimd_can_fdct_ifast (void)
737 {
738   init_simd();
739 
740   /* The code is optimised for these values only */
741   if (DCTSIZE != 8)
742     return 0;
743   if (sizeof(DCTELEM) != 2)
744     return 0;
745 
746   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
747     return 1;
748   if (simd_support & JSIMD_MMX)
749     return 1;
750 
751   return 0;
752 }
753 
754 GLOBAL(int)
jsimd_can_fdct_float(void)755 jsimd_can_fdct_float (void)
756 {
757   init_simd();
758 
759   /* The code is optimised for these values only */
760   if (DCTSIZE != 8)
761     return 0;
762   if (sizeof(FAST_FLOAT) != 4)
763     return 0;
764 
765   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
766     return 1;
767   if (simd_support & JSIMD_3DNOW)
768     return 1;
769 
770   return 0;
771 }
772 
773 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)774 jsimd_fdct_islow (DCTELEM * data)
775 {
776   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
777     jsimd_fdct_islow_sse2(data);
778   else if (simd_support & JSIMD_MMX)
779     jsimd_fdct_islow_mmx(data);
780 }
781 
782 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)783 jsimd_fdct_ifast (DCTELEM * data)
784 {
785   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
786     jsimd_fdct_ifast_sse2(data);
787   else if (simd_support & JSIMD_MMX)
788     jsimd_fdct_ifast_mmx(data);
789 }
790 
791 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)792 jsimd_fdct_float (FAST_FLOAT * data)
793 {
794   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
795     jsimd_fdct_float_sse(data);
796   else if (simd_support & JSIMD_3DNOW)
797     jsimd_fdct_float_3dnow(data);
798 }
799 
800 GLOBAL(int)
jsimd_can_quantize(void)801 jsimd_can_quantize (void)
802 {
803   init_simd();
804 
805   /* The code is optimised for these values only */
806   if (DCTSIZE != 8)
807     return 0;
808   if (sizeof(JCOEF) != 2)
809     return 0;
810   if (sizeof(DCTELEM) != 2)
811     return 0;
812 
813   if (simd_support & JSIMD_SSE2)
814     return 1;
815   if (simd_support & JSIMD_MMX)
816     return 1;
817 
818   return 0;
819 }
820 
821 GLOBAL(int)
jsimd_can_quantize_float(void)822 jsimd_can_quantize_float (void)
823 {
824   init_simd();
825 
826   /* The code is optimised for these values only */
827   if (DCTSIZE != 8)
828     return 0;
829   if (sizeof(JCOEF) != 2)
830     return 0;
831   if (sizeof(FAST_FLOAT) != 4)
832     return 0;
833 
834   if (simd_support & JSIMD_SSE2)
835     return 1;
836   if (simd_support & JSIMD_SSE)
837     return 1;
838   if (simd_support & JSIMD_3DNOW)
839     return 1;
840 
841   return 0;
842 }
843 
844 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)845 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
846                 DCTELEM * workspace)
847 {
848   if (simd_support & JSIMD_SSE2)
849     jsimd_quantize_sse2(coef_block, divisors, workspace);
850   else if (simd_support & JSIMD_MMX)
851     jsimd_quantize_mmx(coef_block, divisors, workspace);
852 }
853 
854 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)855 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
856                       FAST_FLOAT * workspace)
857 {
858   if (simd_support & JSIMD_SSE2)
859     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
860   else if (simd_support & JSIMD_SSE)
861     jsimd_quantize_float_sse(coef_block, divisors, workspace);
862   else if (simd_support & JSIMD_3DNOW)
863     jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
864 }
865 
866 GLOBAL(int)
jsimd_can_idct_2x2(void)867 jsimd_can_idct_2x2 (void)
868 {
869   init_simd();
870 
871   /* The code is optimised for these values only */
872   if (DCTSIZE != 8)
873     return 0;
874   if (sizeof(JCOEF) != 2)
875     return 0;
876   if (BITS_IN_JSAMPLE != 8)
877     return 0;
878   if (sizeof(JDIMENSION) != 4)
879     return 0;
880   if (sizeof(ISLOW_MULT_TYPE) != 2)
881     return 0;
882 
883   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
884     return 1;
885   if (simd_support & JSIMD_MMX)
886     return 1;
887 
888   return 0;
889 }
890 
891 GLOBAL(int)
jsimd_can_idct_4x4(void)892 jsimd_can_idct_4x4 (void)
893 {
894   init_simd();
895 
896   /* The code is optimised for these values only */
897   if (DCTSIZE != 8)
898     return 0;
899   if (sizeof(JCOEF) != 2)
900     return 0;
901   if (BITS_IN_JSAMPLE != 8)
902     return 0;
903   if (sizeof(JDIMENSION) != 4)
904     return 0;
905   if (sizeof(ISLOW_MULT_TYPE) != 2)
906     return 0;
907 
908   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
909     return 1;
910   if (simd_support & JSIMD_MMX)
911     return 1;
912 
913   return 0;
914 }
915 
916 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)917 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
918                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
919                 JDIMENSION output_col)
920 {
921   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
922     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
923                         output_col);
924   else if (simd_support & JSIMD_MMX)
925     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
926 }
927 
928 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)929 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
930                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
931                 JDIMENSION output_col)
932 {
933   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
934     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
935                         output_col);
936   else if (simd_support & JSIMD_MMX)
937     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
938 }
939 
940 GLOBAL(int)
jsimd_can_idct_islow(void)941 jsimd_can_idct_islow (void)
942 {
943   init_simd();
944 
945   /* The code is optimised for these values only */
946   if (DCTSIZE != 8)
947     return 0;
948   if (sizeof(JCOEF) != 2)
949     return 0;
950   if (BITS_IN_JSAMPLE != 8)
951     return 0;
952   if (sizeof(JDIMENSION) != 4)
953     return 0;
954   if (sizeof(ISLOW_MULT_TYPE) != 2)
955     return 0;
956 
957   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
958     return 1;
959   if (simd_support & JSIMD_MMX)
960     return 1;
961 
962   return 0;
963 }
964 
965 GLOBAL(int)
jsimd_can_idct_ifast(void)966 jsimd_can_idct_ifast (void)
967 {
968   init_simd();
969 
970   /* The code is optimised for these values only */
971   if (DCTSIZE != 8)
972     return 0;
973   if (sizeof(JCOEF) != 2)
974     return 0;
975   if (BITS_IN_JSAMPLE != 8)
976     return 0;
977   if (sizeof(JDIMENSION) != 4)
978     return 0;
979   if (sizeof(IFAST_MULT_TYPE) != 2)
980     return 0;
981   if (IFAST_SCALE_BITS != 2)
982     return 0;
983 
984   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
985     return 1;
986   if (simd_support & JSIMD_MMX)
987     return 1;
988 
989   return 0;
990 }
991 
992 GLOBAL(int)
jsimd_can_idct_float(void)993 jsimd_can_idct_float (void)
994 {
995   init_simd();
996 
997   if (DCTSIZE != 8)
998     return 0;
999   if (sizeof(JCOEF) != 2)
1000     return 0;
1001   if (BITS_IN_JSAMPLE != 8)
1002     return 0;
1003   if (sizeof(JDIMENSION) != 4)
1004     return 0;
1005   if (sizeof(FAST_FLOAT) != 4)
1006     return 0;
1007   if (sizeof(FLOAT_MULT_TYPE) != 4)
1008     return 0;
1009 
1010   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1011     return 1;
1012   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1013     return 1;
1014   if (simd_support & JSIMD_3DNOW)
1015     return 1;
1016 
1017   return 0;
1018 }
1019 
1020 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1021 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1022                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1023                   JDIMENSION output_col)
1024 {
1025   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1026     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1027                           output_col);
1028   else if (simd_support & JSIMD_MMX)
1029     jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1030                          output_col);
1031 }
1032 
1033 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1034 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1035                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1036                   JDIMENSION output_col)
1037 {
1038   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1039     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1040                           output_col);
1041   else if (simd_support & JSIMD_MMX)
1042     jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1043                          output_col);
1044 }
1045 
1046 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1047 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1048                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1049                   JDIMENSION output_col)
1050 {
1051   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1052     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1053                           output_col);
1054   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1055     jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1056                          output_col);
1057   else if (simd_support & JSIMD_3DNOW)
1058     jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1059                            output_col);
1060 }
1061 
1062