1 /*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 #include "jconfigint.h"
25
26 /*
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
29 */
30 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
31
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
37
38 /*
39 * Check what SIMD accelerations are supported.
40 *
41 * FIXME: This code is racy under a multi-threaded environment.
42 */
43 LOCAL(void)
init_simd(void)44 init_simd(void)
45 {
46 #ifndef NO_GETENV
47 char *env = NULL;
48 #endif
49
50 if (simd_support != ~0U)
51 return;
52
53 simd_support = jpeg_simd_cpu_support();
54
55 #ifndef NO_GETENV
56 /* Force different settings through environment variables */
57 env = getenv("JSIMD_FORCESSE2");
58 if ((env != NULL) && (strcmp(env, "1") == 0))
59 simd_support &= JSIMD_SSE2;
60 env = getenv("JSIMD_FORCEAVX2");
61 if ((env != NULL) && (strcmp(env, "1") == 0))
62 simd_support &= JSIMD_AVX2;
63 env = getenv("JSIMD_FORCENONE");
64 if ((env != NULL) && (strcmp(env, "1") == 0))
65 simd_support = 0;
66 env = getenv("JSIMD_NOHUFFENC");
67 if ((env != NULL) && (strcmp(env, "1") == 0))
68 simd_huffman = 0;
69 #endif
70 }
71
72 GLOBAL(int)
jsimd_can_rgb_ycc(void)73 jsimd_can_rgb_ycc(void)
74 {
75 init_simd();
76
77 /* The code is optimised for these values only */
78 if (BITS_IN_JSAMPLE != 8)
79 return 0;
80 if (sizeof(JDIMENSION) != 4)
81 return 0;
82 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
83 return 0;
84
85 if ((simd_support & JSIMD_AVX2) &&
86 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
87 return 1;
88 if ((simd_support & JSIMD_SSE2) &&
89 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
90 return 1;
91
92 return 0;
93 }
94
95 GLOBAL(int)
jsimd_can_rgb_gray(void)96 jsimd_can_rgb_gray(void)
97 {
98 init_simd();
99
100 /* The code is optimised for these values only */
101 if (BITS_IN_JSAMPLE != 8)
102 return 0;
103 if (sizeof(JDIMENSION) != 4)
104 return 0;
105 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
106 return 0;
107
108 if ((simd_support & JSIMD_AVX2) &&
109 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
110 return 1;
111 if ((simd_support & JSIMD_SSE2) &&
112 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
113 return 1;
114
115 return 0;
116 }
117
118 GLOBAL(int)
jsimd_can_ycc_rgb(void)119 jsimd_can_ycc_rgb(void)
120 {
121 init_simd();
122
123 /* The code is optimised for these values only */
124 if (BITS_IN_JSAMPLE != 8)
125 return 0;
126 if (sizeof(JDIMENSION) != 4)
127 return 0;
128 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
129 return 0;
130
131 if ((simd_support & JSIMD_AVX2) &&
132 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
133 return 1;
134 if ((simd_support & JSIMD_SSE2) &&
135 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
136 return 1;
137
138 return 0;
139 }
140
141 GLOBAL(int)
jsimd_can_ycc_rgb565(void)142 jsimd_can_ycc_rgb565(void)
143 {
144 return 0;
145 }
146
147 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)148 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
149 JSAMPIMAGE output_buf, JDIMENSION output_row,
150 int num_rows)
151 {
152 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
153 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
154
155 switch (cinfo->in_color_space) {
156 case JCS_EXT_RGB:
157 avx2fct = jsimd_extrgb_ycc_convert_avx2;
158 sse2fct = jsimd_extrgb_ycc_convert_sse2;
159 break;
160 case JCS_EXT_RGBX:
161 case JCS_EXT_RGBA:
162 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
163 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
164 break;
165 case JCS_EXT_BGR:
166 avx2fct = jsimd_extbgr_ycc_convert_avx2;
167 sse2fct = jsimd_extbgr_ycc_convert_sse2;
168 break;
169 case JCS_EXT_BGRX:
170 case JCS_EXT_BGRA:
171 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
172 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
173 break;
174 case JCS_EXT_XBGR:
175 case JCS_EXT_ABGR:
176 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
177 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
178 break;
179 case JCS_EXT_XRGB:
180 case JCS_EXT_ARGB:
181 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
182 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
183 break;
184 default:
185 avx2fct = jsimd_rgb_ycc_convert_avx2;
186 sse2fct = jsimd_rgb_ycc_convert_sse2;
187 break;
188 }
189
190 if (simd_support & JSIMD_AVX2)
191 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
192 else
193 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
194 }
195
196 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)197 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
198 JSAMPIMAGE output_buf, JDIMENSION output_row,
199 int num_rows)
200 {
201 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
202 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
203
204 switch (cinfo->in_color_space) {
205 case JCS_EXT_RGB:
206 avx2fct = jsimd_extrgb_gray_convert_avx2;
207 sse2fct = jsimd_extrgb_gray_convert_sse2;
208 break;
209 case JCS_EXT_RGBX:
210 case JCS_EXT_RGBA:
211 avx2fct = jsimd_extrgbx_gray_convert_avx2;
212 sse2fct = jsimd_extrgbx_gray_convert_sse2;
213 break;
214 case JCS_EXT_BGR:
215 avx2fct = jsimd_extbgr_gray_convert_avx2;
216 sse2fct = jsimd_extbgr_gray_convert_sse2;
217 break;
218 case JCS_EXT_BGRX:
219 case JCS_EXT_BGRA:
220 avx2fct = jsimd_extbgrx_gray_convert_avx2;
221 sse2fct = jsimd_extbgrx_gray_convert_sse2;
222 break;
223 case JCS_EXT_XBGR:
224 case JCS_EXT_ABGR:
225 avx2fct = jsimd_extxbgr_gray_convert_avx2;
226 sse2fct = jsimd_extxbgr_gray_convert_sse2;
227 break;
228 case JCS_EXT_XRGB:
229 case JCS_EXT_ARGB:
230 avx2fct = jsimd_extxrgb_gray_convert_avx2;
231 sse2fct = jsimd_extxrgb_gray_convert_sse2;
232 break;
233 default:
234 avx2fct = jsimd_rgb_gray_convert_avx2;
235 sse2fct = jsimd_rgb_gray_convert_sse2;
236 break;
237 }
238
239 if (simd_support & JSIMD_AVX2)
240 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
241 else
242 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
243 }
244
245 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)246 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
247 JDIMENSION input_row, JSAMPARRAY output_buf,
248 int num_rows)
249 {
250 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
252
253 switch (cinfo->out_color_space) {
254 case JCS_EXT_RGB:
255 avx2fct = jsimd_ycc_extrgb_convert_avx2;
256 sse2fct = jsimd_ycc_extrgb_convert_sse2;
257 break;
258 case JCS_EXT_RGBX:
259 case JCS_EXT_RGBA:
260 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
261 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
262 break;
263 case JCS_EXT_BGR:
264 avx2fct = jsimd_ycc_extbgr_convert_avx2;
265 sse2fct = jsimd_ycc_extbgr_convert_sse2;
266 break;
267 case JCS_EXT_BGRX:
268 case JCS_EXT_BGRA:
269 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
270 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
271 break;
272 case JCS_EXT_XBGR:
273 case JCS_EXT_ABGR:
274 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
275 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
276 break;
277 case JCS_EXT_XRGB:
278 case JCS_EXT_ARGB:
279 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
280 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
281 break;
282 default:
283 avx2fct = jsimd_ycc_rgb_convert_avx2;
284 sse2fct = jsimd_ycc_rgb_convert_sse2;
285 break;
286 }
287
288 if (simd_support & JSIMD_AVX2)
289 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290 else
291 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292 }
293
294 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)295 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
296 JDIMENSION input_row, JSAMPARRAY output_buf,
297 int num_rows)
298 {
299 }
300
301 GLOBAL(int)
jsimd_can_h2v2_downsample(void)302 jsimd_can_h2v2_downsample(void)
303 {
304 init_simd();
305
306 /* The code is optimised for these values only */
307 if (BITS_IN_JSAMPLE != 8)
308 return 0;
309 if (sizeof(JDIMENSION) != 4)
310 return 0;
311
312 if (simd_support & JSIMD_AVX2)
313 return 1;
314 if (simd_support & JSIMD_SSE2)
315 return 1;
316
317 return 0;
318 }
319
320 GLOBAL(int)
jsimd_can_h2v1_downsample(void)321 jsimd_can_h2v1_downsample(void)
322 {
323 init_simd();
324
325 /* The code is optimised for these values only */
326 if (BITS_IN_JSAMPLE != 8)
327 return 0;
328 if (sizeof(JDIMENSION) != 4)
329 return 0;
330
331 if (simd_support & JSIMD_AVX2)
332 return 1;
333 if (simd_support & JSIMD_SSE2)
334 return 1;
335
336 return 0;
337 }
338
339 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)340 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
341 JSAMPARRAY input_data, JSAMPARRAY output_data)
342 {
343 if (simd_support & JSIMD_AVX2)
344 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
345 compptr->v_samp_factor,
346 compptr->width_in_blocks, input_data,
347 output_data);
348 else
349 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
350 compptr->v_samp_factor,
351 compptr->width_in_blocks, input_data,
352 output_data);
353 }
354
355 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)356 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
357 JSAMPARRAY input_data, JSAMPARRAY output_data)
358 {
359 if (simd_support & JSIMD_AVX2)
360 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
361 compptr->v_samp_factor,
362 compptr->width_in_blocks, input_data,
363 output_data);
364 else
365 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
366 compptr->v_samp_factor,
367 compptr->width_in_blocks, input_data,
368 output_data);
369 }
370
371 GLOBAL(int)
jsimd_can_h2v2_upsample(void)372 jsimd_can_h2v2_upsample(void)
373 {
374 init_simd();
375
376 /* The code is optimised for these values only */
377 if (BITS_IN_JSAMPLE != 8)
378 return 0;
379 if (sizeof(JDIMENSION) != 4)
380 return 0;
381
382 if (simd_support & JSIMD_AVX2)
383 return 1;
384 if (simd_support & JSIMD_SSE2)
385 return 1;
386
387 return 0;
388 }
389
390 GLOBAL(int)
jsimd_can_h2v1_upsample(void)391 jsimd_can_h2v1_upsample(void)
392 {
393 init_simd();
394
395 /* The code is optimised for these values only */
396 if (BITS_IN_JSAMPLE != 8)
397 return 0;
398 if (sizeof(JDIMENSION) != 4)
399 return 0;
400
401 if (simd_support & JSIMD_AVX2)
402 return 1;
403 if (simd_support & JSIMD_SSE2)
404 return 1;
405
406 return 0;
407 }
408
409 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)410 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
411 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
412 {
413 if (simd_support & JSIMD_AVX2)
414 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
415 input_data, output_data_ptr);
416 else
417 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
418 input_data, output_data_ptr);
419 }
420
421 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)422 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
423 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
424 {
425 if (simd_support & JSIMD_AVX2)
426 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
427 input_data, output_data_ptr);
428 else
429 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
430 input_data, output_data_ptr);
431 }
432
433 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)434 jsimd_can_h2v2_fancy_upsample(void)
435 {
436 init_simd();
437
438 /* The code is optimised for these values only */
439 if (BITS_IN_JSAMPLE != 8)
440 return 0;
441 if (sizeof(JDIMENSION) != 4)
442 return 0;
443
444 if ((simd_support & JSIMD_AVX2) &&
445 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
446 return 1;
447 if ((simd_support & JSIMD_SSE2) &&
448 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
449 return 1;
450
451 return 0;
452 }
453
454 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)455 jsimd_can_h2v1_fancy_upsample(void)
456 {
457 init_simd();
458
459 /* The code is optimised for these values only */
460 if (BITS_IN_JSAMPLE != 8)
461 return 0;
462 if (sizeof(JDIMENSION) != 4)
463 return 0;
464
465 if ((simd_support & JSIMD_AVX2) &&
466 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
467 return 1;
468 if ((simd_support & JSIMD_SSE2) &&
469 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
470 return 1;
471
472 return 0;
473 }
474
475 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
477 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
478 {
479 if (simd_support & JSIMD_AVX2)
480 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
481 compptr->downsampled_width, input_data,
482 output_data_ptr);
483 else
484 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
485 compptr->downsampled_width, input_data,
486 output_data_ptr);
487 }
488
489 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)490 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492 {
493 if (simd_support & JSIMD_AVX2)
494 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
495 compptr->downsampled_width, input_data,
496 output_data_ptr);
497 else
498 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
499 compptr->downsampled_width, input_data,
500 output_data_ptr);
501 }
502
503 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)504 jsimd_can_h2v2_merged_upsample(void)
505 {
506 init_simd();
507
508 /* The code is optimised for these values only */
509 if (BITS_IN_JSAMPLE != 8)
510 return 0;
511 if (sizeof(JDIMENSION) != 4)
512 return 0;
513
514 if ((simd_support & JSIMD_AVX2) &&
515 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
516 return 1;
517 if ((simd_support & JSIMD_SSE2) &&
518 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
519 return 1;
520
521 return 0;
522 }
523
524 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)525 jsimd_can_h2v1_merged_upsample(void)
526 {
527 init_simd();
528
529 /* The code is optimised for these values only */
530 if (BITS_IN_JSAMPLE != 8)
531 return 0;
532 if (sizeof(JDIMENSION) != 4)
533 return 0;
534
535 if ((simd_support & JSIMD_AVX2) &&
536 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
537 return 1;
538 if ((simd_support & JSIMD_SSE2) &&
539 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
540 return 1;
541
542 return 0;
543 }
544
545 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)546 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
547 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
548 {
549 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
550 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551
552 switch (cinfo->out_color_space) {
553 case JCS_EXT_RGB:
554 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
555 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
556 break;
557 case JCS_EXT_RGBX:
558 case JCS_EXT_RGBA:
559 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
560 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
561 break;
562 case JCS_EXT_BGR:
563 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
564 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
565 break;
566 case JCS_EXT_BGRX:
567 case JCS_EXT_BGRA:
568 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
569 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
570 break;
571 case JCS_EXT_XBGR:
572 case JCS_EXT_ABGR:
573 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
574 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
575 break;
576 case JCS_EXT_XRGB:
577 case JCS_EXT_ARGB:
578 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
579 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
580 break;
581 default:
582 avx2fct = jsimd_h2v2_merged_upsample_avx2;
583 sse2fct = jsimd_h2v2_merged_upsample_sse2;
584 break;
585 }
586
587 if (simd_support & JSIMD_AVX2)
588 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
589 else
590 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591 }
592
593 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)594 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
595 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
596 {
597 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
598 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
599
600 switch (cinfo->out_color_space) {
601 case JCS_EXT_RGB:
602 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
603 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
604 break;
605 case JCS_EXT_RGBX:
606 case JCS_EXT_RGBA:
607 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
608 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
609 break;
610 case JCS_EXT_BGR:
611 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
612 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
613 break;
614 case JCS_EXT_BGRX:
615 case JCS_EXT_BGRA:
616 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
617 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
618 break;
619 case JCS_EXT_XBGR:
620 case JCS_EXT_ABGR:
621 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
622 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
623 break;
624 case JCS_EXT_XRGB:
625 case JCS_EXT_ARGB:
626 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
627 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
628 break;
629 default:
630 avx2fct = jsimd_h2v1_merged_upsample_avx2;
631 sse2fct = jsimd_h2v1_merged_upsample_sse2;
632 break;
633 }
634
635 if (simd_support & JSIMD_AVX2)
636 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
637 else
638 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
639 }
640
641 GLOBAL(int)
jsimd_can_convsamp(void)642 jsimd_can_convsamp(void)
643 {
644 init_simd();
645
646 /* The code is optimised for these values only */
647 if (DCTSIZE != 8)
648 return 0;
649 if (BITS_IN_JSAMPLE != 8)
650 return 0;
651 if (sizeof(JDIMENSION) != 4)
652 return 0;
653 if (sizeof(DCTELEM) != 2)
654 return 0;
655
656 if (simd_support & JSIMD_AVX2)
657 return 1;
658 if (simd_support & JSIMD_SSE2)
659 return 1;
660
661 return 0;
662 }
663
664 GLOBAL(int)
jsimd_can_convsamp_float(void)665 jsimd_can_convsamp_float(void)
666 {
667 init_simd();
668
669 /* The code is optimised for these values only */
670 if (DCTSIZE != 8)
671 return 0;
672 if (BITS_IN_JSAMPLE != 8)
673 return 0;
674 if (sizeof(JDIMENSION) != 4)
675 return 0;
676 if (sizeof(FAST_FLOAT) != 4)
677 return 0;
678
679 if (simd_support & JSIMD_SSE2)
680 return 1;
681
682 return 0;
683 }
684
685 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)686 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
687 DCTELEM *workspace)
688 {
689 if (simd_support & JSIMD_AVX2)
690 jsimd_convsamp_avx2(sample_data, start_col, workspace);
691 else
692 jsimd_convsamp_sse2(sample_data, start_col, workspace);
693 }
694
695 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)696 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
697 FAST_FLOAT *workspace)
698 {
699 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
700 }
701
702 GLOBAL(int)
jsimd_can_fdct_islow(void)703 jsimd_can_fdct_islow(void)
704 {
705 init_simd();
706
707 /* The code is optimised for these values only */
708 if (DCTSIZE != 8)
709 return 0;
710 if (sizeof(DCTELEM) != 2)
711 return 0;
712
713 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
714 return 1;
715 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
716 return 1;
717
718 return 0;
719 }
720
721 GLOBAL(int)
jsimd_can_fdct_ifast(void)722 jsimd_can_fdct_ifast(void)
723 {
724 init_simd();
725
726 /* The code is optimised for these values only */
727 if (DCTSIZE != 8)
728 return 0;
729 if (sizeof(DCTELEM) != 2)
730 return 0;
731
732 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
733 return 1;
734
735 return 0;
736 }
737
738 GLOBAL(int)
jsimd_can_fdct_float(void)739 jsimd_can_fdct_float(void)
740 {
741 init_simd();
742
743 /* The code is optimised for these values only */
744 if (DCTSIZE != 8)
745 return 0;
746 if (sizeof(FAST_FLOAT) != 4)
747 return 0;
748
749 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
750 return 1;
751
752 return 0;
753 }
754
755 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)756 jsimd_fdct_islow(DCTELEM *data)
757 {
758 if (simd_support & JSIMD_AVX2)
759 jsimd_fdct_islow_avx2(data);
760 else
761 jsimd_fdct_islow_sse2(data);
762 }
763
764 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)765 jsimd_fdct_ifast(DCTELEM *data)
766 {
767 jsimd_fdct_ifast_sse2(data);
768 }
769
770 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)771 jsimd_fdct_float(FAST_FLOAT *data)
772 {
773 jsimd_fdct_float_sse(data);
774 }
775
776 GLOBAL(int)
jsimd_can_quantize(void)777 jsimd_can_quantize(void)
778 {
779 init_simd();
780
781 /* The code is optimised for these values only */
782 if (DCTSIZE != 8)
783 return 0;
784 if (sizeof(JCOEF) != 2)
785 return 0;
786 if (sizeof(DCTELEM) != 2)
787 return 0;
788
789 if (simd_support & JSIMD_AVX2)
790 return 1;
791 if (simd_support & JSIMD_SSE2)
792 return 1;
793
794 return 0;
795 }
796
797 GLOBAL(int)
jsimd_can_quantize_float(void)798 jsimd_can_quantize_float(void)
799 {
800 init_simd();
801
802 /* The code is optimised for these values only */
803 if (DCTSIZE != 8)
804 return 0;
805 if (sizeof(JCOEF) != 2)
806 return 0;
807 if (sizeof(FAST_FLOAT) != 4)
808 return 0;
809
810 if (simd_support & JSIMD_SSE2)
811 return 1;
812
813 return 0;
814 }
815
816 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)817 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
818 {
819 if (simd_support & JSIMD_AVX2)
820 jsimd_quantize_avx2(coef_block, divisors, workspace);
821 else
822 jsimd_quantize_sse2(coef_block, divisors, workspace);
823 }
824
825 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)826 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
827 FAST_FLOAT *workspace)
828 {
829 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
830 }
831
832 GLOBAL(int)
jsimd_can_idct_2x2(void)833 jsimd_can_idct_2x2(void)
834 {
835 init_simd();
836
837 /* The code is optimised for these values only */
838 if (DCTSIZE != 8)
839 return 0;
840 if (sizeof(JCOEF) != 2)
841 return 0;
842 if (BITS_IN_JSAMPLE != 8)
843 return 0;
844 if (sizeof(JDIMENSION) != 4)
845 return 0;
846 if (sizeof(ISLOW_MULT_TYPE) != 2)
847 return 0;
848
849 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
850 return 1;
851
852 return 0;
853 }
854
855 GLOBAL(int)
jsimd_can_idct_4x4(void)856 jsimd_can_idct_4x4(void)
857 {
858 init_simd();
859
860 /* The code is optimised for these values only */
861 if (DCTSIZE != 8)
862 return 0;
863 if (sizeof(JCOEF) != 2)
864 return 0;
865 if (BITS_IN_JSAMPLE != 8)
866 return 0;
867 if (sizeof(JDIMENSION) != 4)
868 return 0;
869 if (sizeof(ISLOW_MULT_TYPE) != 2)
870 return 0;
871
872 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
873 return 1;
874
875 return 0;
876 }
877
878 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)879 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
880 JCOEFPTR coef_block, JSAMPARRAY output_buf,
881 JDIMENSION output_col)
882 {
883 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
884 }
885
886 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)887 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
888 JCOEFPTR coef_block, JSAMPARRAY output_buf,
889 JDIMENSION output_col)
890 {
891 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
892 }
893
894 GLOBAL(int)
jsimd_can_idct_islow(void)895 jsimd_can_idct_islow(void)
896 {
897 init_simd();
898
899 /* The code is optimised for these values only */
900 if (DCTSIZE != 8)
901 return 0;
902 if (sizeof(JCOEF) != 2)
903 return 0;
904 if (BITS_IN_JSAMPLE != 8)
905 return 0;
906 if (sizeof(JDIMENSION) != 4)
907 return 0;
908 if (sizeof(ISLOW_MULT_TYPE) != 2)
909 return 0;
910
911 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
912 return 1;
913 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
914 return 1;
915
916 return 0;
917 }
918
919 GLOBAL(int)
jsimd_can_idct_ifast(void)920 jsimd_can_idct_ifast(void)
921 {
922 init_simd();
923
924 /* The code is optimised for these values only */
925 if (DCTSIZE != 8)
926 return 0;
927 if (sizeof(JCOEF) != 2)
928 return 0;
929 if (BITS_IN_JSAMPLE != 8)
930 return 0;
931 if (sizeof(JDIMENSION) != 4)
932 return 0;
933 if (sizeof(IFAST_MULT_TYPE) != 2)
934 return 0;
935 if (IFAST_SCALE_BITS != 2)
936 return 0;
937
938 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
939 return 1;
940
941 return 0;
942 }
943
944 GLOBAL(int)
jsimd_can_idct_float(void)945 jsimd_can_idct_float(void)
946 {
947 init_simd();
948
949 if (DCTSIZE != 8)
950 return 0;
951 if (sizeof(JCOEF) != 2)
952 return 0;
953 if (BITS_IN_JSAMPLE != 8)
954 return 0;
955 if (sizeof(JDIMENSION) != 4)
956 return 0;
957 if (sizeof(FAST_FLOAT) != 4)
958 return 0;
959 if (sizeof(FLOAT_MULT_TYPE) != 4)
960 return 0;
961
962 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
963 return 1;
964
965 return 0;
966 }
967
968 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)969 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
970 JCOEFPTR coef_block, JSAMPARRAY output_buf,
971 JDIMENSION output_col)
972 {
973 if (simd_support & JSIMD_AVX2)
974 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
975 output_col);
976 else
977 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
978 output_col);
979 }
980
981 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)982 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
983 JCOEFPTR coef_block, JSAMPARRAY output_buf,
984 JDIMENSION output_col)
985 {
986 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
987 output_col);
988 }
989
990 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)991 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
992 JCOEFPTR coef_block, JSAMPARRAY output_buf,
993 JDIMENSION output_col)
994 {
995 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
996 output_col);
997 }
998
999 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1000 jsimd_can_huff_encode_one_block(void)
1001 {
1002 init_simd();
1003
1004 if (DCTSIZE != 8)
1005 return 0;
1006 if (sizeof(JCOEF) != 2)
1007 return 0;
1008
1009 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1010 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1011 return 1;
1012
1013 return 0;
1014 }
1015
1016 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1017 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1018 int last_dc_val, c_derived_tbl *dctbl,
1019 c_derived_tbl *actbl)
1020 {
1021 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1022 dctbl, actbl);
1023 }
1024
1025 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1026 jsimd_can_encode_mcu_AC_first_prepare(void)
1027 {
1028 init_simd();
1029
1030 if (DCTSIZE != 8)
1031 return 0;
1032 if (sizeof(JCOEF) != 2)
1033 return 0;
1034 if (simd_support & JSIMD_SSE2)
1035 return 1;
1036
1037 return 0;
1038 }
1039
1040 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1041 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1042 const int *jpeg_natural_order_start, int Sl,
1043 int Al, JCOEF *values, size_t *zerobits)
1044 {
1045 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1046 Sl, Al, values, zerobits);
1047 }
1048
1049 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1050 jsimd_can_encode_mcu_AC_refine_prepare(void)
1051 {
1052 init_simd();
1053
1054 if (DCTSIZE != 8)
1055 return 0;
1056 if (sizeof(JCOEF) != 2)
1057 return 0;
1058 if (simd_support & JSIMD_SSE2)
1059 return 1;
1060
1061 return 0;
1062 }
1063
1064 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1065 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1066 const int *jpeg_natural_order_start, int Sl,
1067 int Al, JCOEF *absvalues, size_t *bits)
1068 {
1069 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1070 jpeg_natural_order_start,
1071 Sl, Al, absvalues, bits);
1072 }
1073