1 /*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 32-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 #include "jconfigint.h"
25
26 /*
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
29 */
30 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
31
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
37
38 /*
39 * Check what SIMD accelerations are supported.
40 *
41 * FIXME: This code is racy under a multi-threaded environment.
42 */
43 LOCAL(void)
init_simd(void)44 init_simd(void)
45 {
46 #ifndef NO_GETENV
47 char *env = NULL;
48 #endif
49
50 if (simd_support != ~0U)
51 return;
52
53 simd_support = jpeg_simd_cpu_support();
54
55 #ifndef NO_GETENV
56 /* Force different settings through environment variables */
57 env = getenv("JSIMD_FORCEMMX");
58 if ((env != NULL) && (strcmp(env, "1") == 0))
59 simd_support &= JSIMD_MMX;
60 env = getenv("JSIMD_FORCE3DNOW");
61 if ((env != NULL) && (strcmp(env, "1") == 0))
62 simd_support &= JSIMD_3DNOW | JSIMD_MMX;
63 env = getenv("JSIMD_FORCESSE");
64 if ((env != NULL) && (strcmp(env, "1") == 0))
65 simd_support &= JSIMD_SSE | JSIMD_MMX;
66 env = getenv("JSIMD_FORCESSE2");
67 if ((env != NULL) && (strcmp(env, "1") == 0))
68 simd_support &= JSIMD_SSE2;
69 env = getenv("JSIMD_FORCEAVX2");
70 if ((env != NULL) && (strcmp(env, "1") == 0))
71 simd_support &= JSIMD_AVX2;
72 env = getenv("JSIMD_FORCENONE");
73 if ((env != NULL) && (strcmp(env, "1") == 0))
74 simd_support = 0;
75 env = getenv("JSIMD_NOHUFFENC");
76 if ((env != NULL) && (strcmp(env, "1") == 0))
77 simd_huffman = 0;
78 #endif
79 }
80
81 GLOBAL(int)
jsimd_can_rgb_ycc(void)82 jsimd_can_rgb_ycc(void)
83 {
84 init_simd();
85
86 /* The code is optimised for these values only */
87 if (BITS_IN_JSAMPLE != 8)
88 return 0;
89 if (sizeof(JDIMENSION) != 4)
90 return 0;
91 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
92 return 0;
93
94 if ((simd_support & JSIMD_AVX2) &&
95 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
96 return 1;
97 if ((simd_support & JSIMD_SSE2) &&
98 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
99 return 1;
100 if (simd_support & JSIMD_MMX)
101 return 1;
102
103 return 0;
104 }
105
106 GLOBAL(int)
jsimd_can_rgb_gray(void)107 jsimd_can_rgb_gray(void)
108 {
109 init_simd();
110
111 /* The code is optimised for these values only */
112 if (BITS_IN_JSAMPLE != 8)
113 return 0;
114 if (sizeof(JDIMENSION) != 4)
115 return 0;
116 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
117 return 0;
118
119 if ((simd_support & JSIMD_AVX2) &&
120 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
121 return 1;
122 if ((simd_support & JSIMD_SSE2) &&
123 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
124 return 1;
125 if (simd_support & JSIMD_MMX)
126 return 1;
127
128 return 0;
129 }
130
131 GLOBAL(int)
jsimd_can_ycc_rgb(void)132 jsimd_can_ycc_rgb(void)
133 {
134 init_simd();
135
136 /* The code is optimised for these values only */
137 if (BITS_IN_JSAMPLE != 8)
138 return 0;
139 if (sizeof(JDIMENSION) != 4)
140 return 0;
141 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
142 return 0;
143
144 if ((simd_support & JSIMD_AVX2) &&
145 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
146 return 1;
147 if ((simd_support & JSIMD_SSE2) &&
148 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
149 return 1;
150 if (simd_support & JSIMD_MMX)
151 return 1;
152
153 return 0;
154 }
155
156 GLOBAL(int)
jsimd_can_ycc_rgb565(void)157 jsimd_can_ycc_rgb565(void)
158 {
159 return 0;
160 }
161
162 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)163 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
164 JSAMPIMAGE output_buf, JDIMENSION output_row,
165 int num_rows)
166 {
167 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
168 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
169 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
170
171 switch (cinfo->in_color_space) {
172 case JCS_EXT_RGB:
173 avx2fct = jsimd_extrgb_ycc_convert_avx2;
174 sse2fct = jsimd_extrgb_ycc_convert_sse2;
175 mmxfct = jsimd_extrgb_ycc_convert_mmx;
176 break;
177 case JCS_EXT_RGBX:
178 case JCS_EXT_RGBA:
179 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
180 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
181 mmxfct = jsimd_extrgbx_ycc_convert_mmx;
182 break;
183 case JCS_EXT_BGR:
184 avx2fct = jsimd_extbgr_ycc_convert_avx2;
185 sse2fct = jsimd_extbgr_ycc_convert_sse2;
186 mmxfct = jsimd_extbgr_ycc_convert_mmx;
187 break;
188 case JCS_EXT_BGRX:
189 case JCS_EXT_BGRA:
190 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
191 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
192 mmxfct = jsimd_extbgrx_ycc_convert_mmx;
193 break;
194 case JCS_EXT_XBGR:
195 case JCS_EXT_ABGR:
196 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
197 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
198 mmxfct = jsimd_extxbgr_ycc_convert_mmx;
199 break;
200 case JCS_EXT_XRGB:
201 case JCS_EXT_ARGB:
202 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
203 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
204 mmxfct = jsimd_extxrgb_ycc_convert_mmx;
205 break;
206 default:
207 avx2fct = jsimd_rgb_ycc_convert_avx2;
208 sse2fct = jsimd_rgb_ycc_convert_sse2;
209 mmxfct = jsimd_rgb_ycc_convert_mmx;
210 break;
211 }
212
213 if (simd_support & JSIMD_AVX2)
214 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
215 else if (simd_support & JSIMD_SSE2)
216 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
217 else
218 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
219 }
220
221 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)222 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
223 JSAMPIMAGE output_buf, JDIMENSION output_row,
224 int num_rows)
225 {
226 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
227 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
228 void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
229
230 switch (cinfo->in_color_space) {
231 case JCS_EXT_RGB:
232 avx2fct = jsimd_extrgb_gray_convert_avx2;
233 sse2fct = jsimd_extrgb_gray_convert_sse2;
234 mmxfct = jsimd_extrgb_gray_convert_mmx;
235 break;
236 case JCS_EXT_RGBX:
237 case JCS_EXT_RGBA:
238 avx2fct = jsimd_extrgbx_gray_convert_avx2;
239 sse2fct = jsimd_extrgbx_gray_convert_sse2;
240 mmxfct = jsimd_extrgbx_gray_convert_mmx;
241 break;
242 case JCS_EXT_BGR:
243 avx2fct = jsimd_extbgr_gray_convert_avx2;
244 sse2fct = jsimd_extbgr_gray_convert_sse2;
245 mmxfct = jsimd_extbgr_gray_convert_mmx;
246 break;
247 case JCS_EXT_BGRX:
248 case JCS_EXT_BGRA:
249 avx2fct = jsimd_extbgrx_gray_convert_avx2;
250 sse2fct = jsimd_extbgrx_gray_convert_sse2;
251 mmxfct = jsimd_extbgrx_gray_convert_mmx;
252 break;
253 case JCS_EXT_XBGR:
254 case JCS_EXT_ABGR:
255 avx2fct = jsimd_extxbgr_gray_convert_avx2;
256 sse2fct = jsimd_extxbgr_gray_convert_sse2;
257 mmxfct = jsimd_extxbgr_gray_convert_mmx;
258 break;
259 case JCS_EXT_XRGB:
260 case JCS_EXT_ARGB:
261 avx2fct = jsimd_extxrgb_gray_convert_avx2;
262 sse2fct = jsimd_extxrgb_gray_convert_sse2;
263 mmxfct = jsimd_extxrgb_gray_convert_mmx;
264 break;
265 default:
266 avx2fct = jsimd_rgb_gray_convert_avx2;
267 sse2fct = jsimd_rgb_gray_convert_sse2;
268 mmxfct = jsimd_rgb_gray_convert_mmx;
269 break;
270 }
271
272 if (simd_support & JSIMD_AVX2)
273 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
274 else if (simd_support & JSIMD_SSE2)
275 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
276 else
277 mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
278 }
279
280 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)281 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
282 JDIMENSION input_row, JSAMPARRAY output_buf,
283 int num_rows)
284 {
285 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
286 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
287 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
288
289 switch (cinfo->out_color_space) {
290 case JCS_EXT_RGB:
291 avx2fct = jsimd_ycc_extrgb_convert_avx2;
292 sse2fct = jsimd_ycc_extrgb_convert_sse2;
293 mmxfct = jsimd_ycc_extrgb_convert_mmx;
294 break;
295 case JCS_EXT_RGBX:
296 case JCS_EXT_RGBA:
297 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
298 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
299 mmxfct = jsimd_ycc_extrgbx_convert_mmx;
300 break;
301 case JCS_EXT_BGR:
302 avx2fct = jsimd_ycc_extbgr_convert_avx2;
303 sse2fct = jsimd_ycc_extbgr_convert_sse2;
304 mmxfct = jsimd_ycc_extbgr_convert_mmx;
305 break;
306 case JCS_EXT_BGRX:
307 case JCS_EXT_BGRA:
308 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
309 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
310 mmxfct = jsimd_ycc_extbgrx_convert_mmx;
311 break;
312 case JCS_EXT_XBGR:
313 case JCS_EXT_ABGR:
314 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
315 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
316 mmxfct = jsimd_ycc_extxbgr_convert_mmx;
317 break;
318 case JCS_EXT_XRGB:
319 case JCS_EXT_ARGB:
320 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
321 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
322 mmxfct = jsimd_ycc_extxrgb_convert_mmx;
323 break;
324 default:
325 avx2fct = jsimd_ycc_rgb_convert_avx2;
326 sse2fct = jsimd_ycc_rgb_convert_sse2;
327 mmxfct = jsimd_ycc_rgb_convert_mmx;
328 break;
329 }
330
331 if (simd_support & JSIMD_AVX2)
332 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
333 else if (simd_support & JSIMD_SSE2)
334 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
335 else
336 mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
337 }
338
339 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)340 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
341 JDIMENSION input_row, JSAMPARRAY output_buf,
342 int num_rows)
343 {
344 }
345
346 GLOBAL(int)
jsimd_can_h2v2_downsample(void)347 jsimd_can_h2v2_downsample(void)
348 {
349 init_simd();
350
351 /* The code is optimised for these values only */
352 if (BITS_IN_JSAMPLE != 8)
353 return 0;
354 if (sizeof(JDIMENSION) != 4)
355 return 0;
356
357 if (simd_support & JSIMD_AVX2)
358 return 1;
359 if (simd_support & JSIMD_SSE2)
360 return 1;
361 if (simd_support & JSIMD_MMX)
362 return 1;
363
364 return 0;
365 }
366
367 GLOBAL(int)
jsimd_can_h2v1_downsample(void)368 jsimd_can_h2v1_downsample(void)
369 {
370 init_simd();
371
372 /* The code is optimised for these values only */
373 if (BITS_IN_JSAMPLE != 8)
374 return 0;
375 if (sizeof(JDIMENSION) != 4)
376 return 0;
377
378 if (simd_support & JSIMD_AVX2)
379 return 1;
380 if (simd_support & JSIMD_SSE2)
381 return 1;
382 if (simd_support & JSIMD_MMX)
383 return 1;
384
385 return 0;
386 }
387
388 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)389 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
390 JSAMPARRAY input_data, JSAMPARRAY output_data)
391 {
392 if (simd_support & JSIMD_AVX2)
393 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
394 compptr->v_samp_factor,
395 compptr->width_in_blocks, input_data,
396 output_data);
397 else if (simd_support & JSIMD_SSE2)
398 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
399 compptr->v_samp_factor,
400 compptr->width_in_blocks, input_data,
401 output_data);
402 else
403 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
404 compptr->v_samp_factor, compptr->width_in_blocks,
405 input_data, output_data);
406 }
407
408 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)409 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
410 JSAMPARRAY input_data, JSAMPARRAY output_data)
411 {
412 if (simd_support & JSIMD_AVX2)
413 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
414 compptr->v_samp_factor,
415 compptr->width_in_blocks, input_data,
416 output_data);
417 else if (simd_support & JSIMD_SSE2)
418 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
419 compptr->v_samp_factor,
420 compptr->width_in_blocks, input_data,
421 output_data);
422 else
423 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
424 compptr->v_samp_factor, compptr->width_in_blocks,
425 input_data, output_data);
426 }
427
428 GLOBAL(int)
jsimd_can_h2v2_upsample(void)429 jsimd_can_h2v2_upsample(void)
430 {
431 init_simd();
432
433 /* The code is optimised for these values only */
434 if (BITS_IN_JSAMPLE != 8)
435 return 0;
436 if (sizeof(JDIMENSION) != 4)
437 return 0;
438
439 if (simd_support & JSIMD_AVX2)
440 return 1;
441 if (simd_support & JSIMD_SSE2)
442 return 1;
443 if (simd_support & JSIMD_MMX)
444 return 1;
445
446 return 0;
447 }
448
449 GLOBAL(int)
jsimd_can_h2v1_upsample(void)450 jsimd_can_h2v1_upsample(void)
451 {
452 init_simd();
453
454 /* The code is optimised for these values only */
455 if (BITS_IN_JSAMPLE != 8)
456 return 0;
457 if (sizeof(JDIMENSION) != 4)
458 return 0;
459
460 if (simd_support & JSIMD_AVX2)
461 return 1;
462 if (simd_support & JSIMD_SSE2)
463 return 1;
464 if (simd_support & JSIMD_MMX)
465 return 1;
466
467 return 0;
468 }
469
470 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)471 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
472 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
473 {
474 if (simd_support & JSIMD_AVX2)
475 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
476 input_data, output_data_ptr);
477 else if (simd_support & JSIMD_SSE2)
478 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
479 input_data, output_data_ptr);
480 else
481 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
482 input_data, output_data_ptr);
483 }
484
485 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)486 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
487 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
488 {
489 if (simd_support & JSIMD_AVX2)
490 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
491 input_data, output_data_ptr);
492 else if (simd_support & JSIMD_SSE2)
493 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
494 input_data, output_data_ptr);
495 else
496 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
497 input_data, output_data_ptr);
498 }
499
500 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)501 jsimd_can_h2v2_fancy_upsample(void)
502 {
503 init_simd();
504
505 /* The code is optimised for these values only */
506 if (BITS_IN_JSAMPLE != 8)
507 return 0;
508 if (sizeof(JDIMENSION) != 4)
509 return 0;
510
511 if ((simd_support & JSIMD_AVX2) &&
512 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
513 return 1;
514 if ((simd_support & JSIMD_SSE2) &&
515 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
516 return 1;
517 if (simd_support & JSIMD_MMX)
518 return 1;
519
520 return 0;
521 }
522
523 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)524 jsimd_can_h2v1_fancy_upsample(void)
525 {
526 init_simd();
527
528 /* The code is optimised for these values only */
529 if (BITS_IN_JSAMPLE != 8)
530 return 0;
531 if (sizeof(JDIMENSION) != 4)
532 return 0;
533
534 if ((simd_support & JSIMD_AVX2) &&
535 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
536 return 1;
537 if ((simd_support & JSIMD_SSE2) &&
538 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
539 return 1;
540 if (simd_support & JSIMD_MMX)
541 return 1;
542
543 return 0;
544 }
545
546 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)547 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
548 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
549 {
550 if (simd_support & JSIMD_AVX2)
551 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
552 compptr->downsampled_width, input_data,
553 output_data_ptr);
554 else if (simd_support & JSIMD_SSE2)
555 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
556 compptr->downsampled_width, input_data,
557 output_data_ptr);
558 else
559 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
560 compptr->downsampled_width, input_data,
561 output_data_ptr);
562 }
563
564 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)565 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
566 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
567 {
568 if (simd_support & JSIMD_AVX2)
569 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
570 compptr->downsampled_width, input_data,
571 output_data_ptr);
572 else if (simd_support & JSIMD_SSE2)
573 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
574 compptr->downsampled_width, input_data,
575 output_data_ptr);
576 else
577 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
578 compptr->downsampled_width, input_data,
579 output_data_ptr);
580 }
581
582 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)583 jsimd_can_h2v2_merged_upsample(void)
584 {
585 init_simd();
586
587 /* The code is optimised for these values only */
588 if (BITS_IN_JSAMPLE != 8)
589 return 0;
590 if (sizeof(JDIMENSION) != 4)
591 return 0;
592
593 if ((simd_support & JSIMD_AVX2) &&
594 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
595 return 1;
596 if ((simd_support & JSIMD_SSE2) &&
597 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
598 return 1;
599 if (simd_support & JSIMD_MMX)
600 return 1;
601
602 return 0;
603 }
604
605 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)606 jsimd_can_h2v1_merged_upsample(void)
607 {
608 init_simd();
609
610 /* The code is optimised for these values only */
611 if (BITS_IN_JSAMPLE != 8)
612 return 0;
613 if (sizeof(JDIMENSION) != 4)
614 return 0;
615
616 if ((simd_support & JSIMD_AVX2) &&
617 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
618 return 1;
619 if ((simd_support & JSIMD_SSE2) &&
620 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
621 return 1;
622 if (simd_support & JSIMD_MMX)
623 return 1;
624
625 return 0;
626 }
627
628 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)629 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
630 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
631 {
632 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
633 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
634 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
635
636 switch (cinfo->out_color_space) {
637 case JCS_EXT_RGB:
638 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
639 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
640 mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx;
641 break;
642 case JCS_EXT_RGBX:
643 case JCS_EXT_RGBA:
644 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
645 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
646 mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx;
647 break;
648 case JCS_EXT_BGR:
649 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
650 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
651 mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx;
652 break;
653 case JCS_EXT_BGRX:
654 case JCS_EXT_BGRA:
655 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
656 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
657 mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx;
658 break;
659 case JCS_EXT_XBGR:
660 case JCS_EXT_ABGR:
661 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
662 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
663 mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx;
664 break;
665 case JCS_EXT_XRGB:
666 case JCS_EXT_ARGB:
667 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
668 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
669 mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx;
670 break;
671 default:
672 avx2fct = jsimd_h2v2_merged_upsample_avx2;
673 sse2fct = jsimd_h2v2_merged_upsample_sse2;
674 mmxfct = jsimd_h2v2_merged_upsample_mmx;
675 break;
676 }
677
678 if (simd_support & JSIMD_AVX2)
679 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
680 else if (simd_support & JSIMD_SSE2)
681 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
682 else
683 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
684 }
685
686 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)687 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
688 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
689 {
690 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
691 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
692 void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
693
694 switch (cinfo->out_color_space) {
695 case JCS_EXT_RGB:
696 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
697 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
698 mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx;
699 break;
700 case JCS_EXT_RGBX:
701 case JCS_EXT_RGBA:
702 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
703 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
704 mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx;
705 break;
706 case JCS_EXT_BGR:
707 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
708 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
709 mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx;
710 break;
711 case JCS_EXT_BGRX:
712 case JCS_EXT_BGRA:
713 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
714 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
715 mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx;
716 break;
717 case JCS_EXT_XBGR:
718 case JCS_EXT_ABGR:
719 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
720 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
721 mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx;
722 break;
723 case JCS_EXT_XRGB:
724 case JCS_EXT_ARGB:
725 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
726 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
727 mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx;
728 break;
729 default:
730 avx2fct = jsimd_h2v1_merged_upsample_avx2;
731 sse2fct = jsimd_h2v1_merged_upsample_sse2;
732 mmxfct = jsimd_h2v1_merged_upsample_mmx;
733 break;
734 }
735
736 if (simd_support & JSIMD_AVX2)
737 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
738 else if (simd_support & JSIMD_SSE2)
739 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
740 else
741 mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
742 }
743
744 GLOBAL(int)
jsimd_can_convsamp(void)745 jsimd_can_convsamp(void)
746 {
747 init_simd();
748
749 /* The code is optimised for these values only */
750 if (DCTSIZE != 8)
751 return 0;
752 if (BITS_IN_JSAMPLE != 8)
753 return 0;
754 if (sizeof(JDIMENSION) != 4)
755 return 0;
756 if (sizeof(DCTELEM) != 2)
757 return 0;
758
759 if (simd_support & JSIMD_AVX2)
760 return 1;
761 if (simd_support & JSIMD_SSE2)
762 return 1;
763 if (simd_support & JSIMD_MMX)
764 return 1;
765
766 return 0;
767 }
768
769 GLOBAL(int)
jsimd_can_convsamp_float(void)770 jsimd_can_convsamp_float(void)
771 {
772 init_simd();
773
774 /* The code is optimised for these values only */
775 if (DCTSIZE != 8)
776 return 0;
777 if (BITS_IN_JSAMPLE != 8)
778 return 0;
779 if (sizeof(JDIMENSION) != 4)
780 return 0;
781 if (sizeof(FAST_FLOAT) != 4)
782 return 0;
783
784 if (simd_support & JSIMD_SSE2)
785 return 1;
786 if (simd_support & JSIMD_SSE)
787 return 1;
788 if (simd_support & JSIMD_3DNOW)
789 return 1;
790
791 return 0;
792 }
793
794 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)795 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
796 DCTELEM *workspace)
797 {
798 if (simd_support & JSIMD_AVX2)
799 jsimd_convsamp_avx2(sample_data, start_col, workspace);
800 else if (simd_support & JSIMD_SSE2)
801 jsimd_convsamp_sse2(sample_data, start_col, workspace);
802 else
803 jsimd_convsamp_mmx(sample_data, start_col, workspace);
804 }
805
806 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)807 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
808 FAST_FLOAT *workspace)
809 {
810 if (simd_support & JSIMD_SSE2)
811 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
812 else if (simd_support & JSIMD_SSE)
813 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
814 else
815 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
816 }
817
818 GLOBAL(int)
jsimd_can_fdct_islow(void)819 jsimd_can_fdct_islow(void)
820 {
821 init_simd();
822
823 /* The code is optimised for these values only */
824 if (DCTSIZE != 8)
825 return 0;
826 if (sizeof(DCTELEM) != 2)
827 return 0;
828
829 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
830 return 1;
831 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
832 return 1;
833 if (simd_support & JSIMD_MMX)
834 return 1;
835
836 return 0;
837 }
838
839 GLOBAL(int)
jsimd_can_fdct_ifast(void)840 jsimd_can_fdct_ifast(void)
841 {
842 init_simd();
843
844 /* The code is optimised for these values only */
845 if (DCTSIZE != 8)
846 return 0;
847 if (sizeof(DCTELEM) != 2)
848 return 0;
849
850 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
851 return 1;
852 if (simd_support & JSIMD_MMX)
853 return 1;
854
855 return 0;
856 }
857
858 GLOBAL(int)
jsimd_can_fdct_float(void)859 jsimd_can_fdct_float(void)
860 {
861 init_simd();
862
863 /* The code is optimised for these values only */
864 if (DCTSIZE != 8)
865 return 0;
866 if (sizeof(FAST_FLOAT) != 4)
867 return 0;
868
869 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
870 return 1;
871 if (simd_support & JSIMD_3DNOW)
872 return 1;
873
874 return 0;
875 }
876
877 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)878 jsimd_fdct_islow(DCTELEM *data)
879 {
880 if (simd_support & JSIMD_AVX2)
881 jsimd_fdct_islow_avx2(data);
882 else if (simd_support & JSIMD_SSE2)
883 jsimd_fdct_islow_sse2(data);
884 else
885 jsimd_fdct_islow_mmx(data);
886 }
887
888 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)889 jsimd_fdct_ifast(DCTELEM *data)
890 {
891 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
892 jsimd_fdct_ifast_sse2(data);
893 else
894 jsimd_fdct_ifast_mmx(data);
895 }
896
897 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)898 jsimd_fdct_float(FAST_FLOAT *data)
899 {
900 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
901 jsimd_fdct_float_sse(data);
902 else if (simd_support & JSIMD_3DNOW)
903 jsimd_fdct_float_3dnow(data);
904 }
905
906 GLOBAL(int)
jsimd_can_quantize(void)907 jsimd_can_quantize(void)
908 {
909 init_simd();
910
911 /* The code is optimised for these values only */
912 if (DCTSIZE != 8)
913 return 0;
914 if (sizeof(JCOEF) != 2)
915 return 0;
916 if (sizeof(DCTELEM) != 2)
917 return 0;
918
919 if (simd_support & JSIMD_AVX2)
920 return 1;
921 if (simd_support & JSIMD_SSE2)
922 return 1;
923 if (simd_support & JSIMD_MMX)
924 return 1;
925
926 return 0;
927 }
928
929 GLOBAL(int)
jsimd_can_quantize_float(void)930 jsimd_can_quantize_float(void)
931 {
932 init_simd();
933
934 /* The code is optimised for these values only */
935 if (DCTSIZE != 8)
936 return 0;
937 if (sizeof(JCOEF) != 2)
938 return 0;
939 if (sizeof(FAST_FLOAT) != 4)
940 return 0;
941
942 if (simd_support & JSIMD_SSE2)
943 return 1;
944 if (simd_support & JSIMD_SSE)
945 return 1;
946 if (simd_support & JSIMD_3DNOW)
947 return 1;
948
949 return 0;
950 }
951
952 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)953 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
954 {
955 if (simd_support & JSIMD_AVX2)
956 jsimd_quantize_avx2(coef_block, divisors, workspace);
957 else if (simd_support & JSIMD_SSE2)
958 jsimd_quantize_sse2(coef_block, divisors, workspace);
959 else
960 jsimd_quantize_mmx(coef_block, divisors, workspace);
961 }
962
963 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)964 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
965 FAST_FLOAT *workspace)
966 {
967 if (simd_support & JSIMD_SSE2)
968 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
969 else if (simd_support & JSIMD_SSE)
970 jsimd_quantize_float_sse(coef_block, divisors, workspace);
971 else
972 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
973 }
974
975 GLOBAL(int)
jsimd_can_idct_2x2(void)976 jsimd_can_idct_2x2(void)
977 {
978 init_simd();
979
980 /* The code is optimised for these values only */
981 if (DCTSIZE != 8)
982 return 0;
983 if (sizeof(JCOEF) != 2)
984 return 0;
985 if (BITS_IN_JSAMPLE != 8)
986 return 0;
987 if (sizeof(JDIMENSION) != 4)
988 return 0;
989 if (sizeof(ISLOW_MULT_TYPE) != 2)
990 return 0;
991
992 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
993 return 1;
994 if (simd_support & JSIMD_MMX)
995 return 1;
996
997 return 0;
998 }
999
1000 GLOBAL(int)
jsimd_can_idct_4x4(void)1001 jsimd_can_idct_4x4(void)
1002 {
1003 init_simd();
1004
1005 /* The code is optimised for these values only */
1006 if (DCTSIZE != 8)
1007 return 0;
1008 if (sizeof(JCOEF) != 2)
1009 return 0;
1010 if (BITS_IN_JSAMPLE != 8)
1011 return 0;
1012 if (sizeof(JDIMENSION) != 4)
1013 return 0;
1014 if (sizeof(ISLOW_MULT_TYPE) != 2)
1015 return 0;
1016
1017 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1018 return 1;
1019 if (simd_support & JSIMD_MMX)
1020 return 1;
1021
1022 return 0;
1023 }
1024
1025 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1026 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1027 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1028 JDIMENSION output_col)
1029 {
1030 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1031 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
1032 output_col);
1033 else
1034 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1035 }
1036
1037 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1038 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1039 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1040 JDIMENSION output_col)
1041 {
1042 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
1043 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
1044 output_col);
1045 else
1046 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1047 }
1048
1049 GLOBAL(int)
jsimd_can_idct_islow(void)1050 jsimd_can_idct_islow(void)
1051 {
1052 init_simd();
1053
1054 /* The code is optimised for these values only */
1055 if (DCTSIZE != 8)
1056 return 0;
1057 if (sizeof(JCOEF) != 2)
1058 return 0;
1059 if (BITS_IN_JSAMPLE != 8)
1060 return 0;
1061 if (sizeof(JDIMENSION) != 4)
1062 return 0;
1063 if (sizeof(ISLOW_MULT_TYPE) != 2)
1064 return 0;
1065
1066 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
1067 return 1;
1068 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1069 return 1;
1070 if (simd_support & JSIMD_MMX)
1071 return 1;
1072
1073 return 0;
1074 }
1075
1076 GLOBAL(int)
jsimd_can_idct_ifast(void)1077 jsimd_can_idct_ifast(void)
1078 {
1079 init_simd();
1080
1081 /* The code is optimised for these values only */
1082 if (DCTSIZE != 8)
1083 return 0;
1084 if (sizeof(JCOEF) != 2)
1085 return 0;
1086 if (BITS_IN_JSAMPLE != 8)
1087 return 0;
1088 if (sizeof(JDIMENSION) != 4)
1089 return 0;
1090 if (sizeof(IFAST_MULT_TYPE) != 2)
1091 return 0;
1092 if (IFAST_SCALE_BITS != 2)
1093 return 0;
1094
1095 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1096 return 1;
1097 if (simd_support & JSIMD_MMX)
1098 return 1;
1099
1100 return 0;
1101 }
1102
1103 GLOBAL(int)
jsimd_can_idct_float(void)1104 jsimd_can_idct_float(void)
1105 {
1106 init_simd();
1107
1108 if (DCTSIZE != 8)
1109 return 0;
1110 if (sizeof(JCOEF) != 2)
1111 return 0;
1112 if (BITS_IN_JSAMPLE != 8)
1113 return 0;
1114 if (sizeof(JDIMENSION) != 4)
1115 return 0;
1116 if (sizeof(FAST_FLOAT) != 4)
1117 return 0;
1118 if (sizeof(FLOAT_MULT_TYPE) != 4)
1119 return 0;
1120
1121 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1122 return 1;
1123 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1124 return 1;
1125 if (simd_support & JSIMD_3DNOW)
1126 return 1;
1127
1128 return 0;
1129 }
1130
1131 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1132 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1133 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1134 JDIMENSION output_col)
1135 {
1136 if (simd_support & JSIMD_AVX2)
1137 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1138 output_col);
1139 else if (simd_support & JSIMD_SSE2)
1140 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1141 output_col);
1142 else
1143 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1144 output_col);
1145 }
1146
1147 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1148 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1149 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1150 JDIMENSION output_col)
1151 {
1152 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1153 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1154 output_col);
1155 else
1156 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1157 output_col);
1158 }
1159
1160 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1161 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1162 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1163 JDIMENSION output_col)
1164 {
1165 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1166 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1167 output_col);
1168 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1169 jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1170 output_col);
1171 else
1172 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1173 output_col);
1174 }
1175
1176 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1177 jsimd_can_huff_encode_one_block(void)
1178 {
1179 init_simd();
1180
1181 if (DCTSIZE != 8)
1182 return 0;
1183 if (sizeof(JCOEF) != 2)
1184 return 0;
1185
1186 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1187 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1188 return 1;
1189
1190 return 0;
1191 }
1192
1193 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1194 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1195 int last_dc_val, c_derived_tbl *dctbl,
1196 c_derived_tbl *actbl)
1197 {
1198 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1199 dctbl, actbl);
1200 }
1201
1202 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1203 jsimd_can_encode_mcu_AC_first_prepare(void)
1204 {
1205 init_simd();
1206
1207 if (DCTSIZE != 8)
1208 return 0;
1209 if (sizeof(JCOEF) != 2)
1210 return 0;
1211 if (SIZEOF_SIZE_T != 4)
1212 return 0;
1213 if (simd_support & JSIMD_SSE2)
1214 return 1;
1215
1216 return 0;
1217 }
1218
1219 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1220 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1221 const int *jpeg_natural_order_start, int Sl,
1222 int Al, JCOEF *values, size_t *zerobits)
1223 {
1224 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1225 Sl, Al, values, zerobits);
1226 }
1227
1228 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1229 jsimd_can_encode_mcu_AC_refine_prepare(void)
1230 {
1231 init_simd();
1232
1233 if (DCTSIZE != 8)
1234 return 0;
1235 if (sizeof(JCOEF) != 2)
1236 return 0;
1237 if (SIZEOF_SIZE_T != 4)
1238 return 0;
1239 if (simd_support & JSIMD_SSE2)
1240 return 1;
1241
1242 return 0;
1243 }
1244
1245 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1246 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1247 const int *jpeg_natural_order_start, int Sl,
1248 int Al, JCOEF *absvalues, size_t *bits)
1249 {
1250 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1251 jpeg_natural_order_start,
1252 Sl, Al, absvalues, bits);
1253 }
1254