1 /*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 #include "jconfigint.h"
25
26 /*
27 * In the PIC cases, we have no guarantee that constants will keep
28 * their alignment. This macro allows us to verify it at runtime.
29 */
30 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
31
32 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
37
38 /*
39 * Check what SIMD accelerations are supported.
40 *
41 * FIXME: This code is racy under a multi-threaded environment.
42 */
43 LOCAL(void)
init_simd(void)44 init_simd(void)
45 {
46 #ifndef NO_GETENV
47 char env[2] = { 0 };
48 #endif
49
50 if (simd_support != ~0U)
51 return;
52
53 simd_support = jpeg_simd_cpu_support();
54
55 #ifndef NO_GETENV
56 /* Force different settings through environment variables */
57 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
58 simd_support &= JSIMD_SSE2;
59 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
60 simd_support &= JSIMD_AVX2;
61 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
62 simd_support = 0;
63 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
64 simd_huffman = 0;
65 #endif
66 }
67
68 GLOBAL(int)
jsimd_can_rgb_ycc(void)69 jsimd_can_rgb_ycc(void)
70 {
71 init_simd();
72
73 /* The code is optimised for these values only */
74 if (BITS_IN_JSAMPLE != 8)
75 return 0;
76 if (sizeof(JDIMENSION) != 4)
77 return 0;
78 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
79 return 0;
80
81 if ((simd_support & JSIMD_AVX2) &&
82 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
83 return 1;
84 if ((simd_support & JSIMD_SSE2) &&
85 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
86 return 1;
87
88 return 0;
89 }
90
91 GLOBAL(int)
jsimd_can_rgb_gray(void)92 jsimd_can_rgb_gray(void)
93 {
94 init_simd();
95
96 /* The code is optimised for these values only */
97 if (BITS_IN_JSAMPLE != 8)
98 return 0;
99 if (sizeof(JDIMENSION) != 4)
100 return 0;
101 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
102 return 0;
103
104 if ((simd_support & JSIMD_AVX2) &&
105 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
106 return 1;
107 if ((simd_support & JSIMD_SSE2) &&
108 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
109 return 1;
110
111 return 0;
112 }
113
114 GLOBAL(int)
jsimd_can_ycc_rgb(void)115 jsimd_can_ycc_rgb(void)
116 {
117 init_simd();
118
119 /* The code is optimised for these values only */
120 if (BITS_IN_JSAMPLE != 8)
121 return 0;
122 if (sizeof(JDIMENSION) != 4)
123 return 0;
124 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
125 return 0;
126
127 if ((simd_support & JSIMD_AVX2) &&
128 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
129 return 1;
130 if ((simd_support & JSIMD_SSE2) &&
131 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
132 return 1;
133
134 return 0;
135 }
136
137 GLOBAL(int)
jsimd_can_ycc_rgb565(void)138 jsimd_can_ycc_rgb565(void)
139 {
140 return 0;
141 }
142
143 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)144 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
145 JSAMPIMAGE output_buf, JDIMENSION output_row,
146 int num_rows)
147 {
148 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
149 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
150
151 switch (cinfo->in_color_space) {
152 case JCS_EXT_RGB:
153 avx2fct = jsimd_extrgb_ycc_convert_avx2;
154 sse2fct = jsimd_extrgb_ycc_convert_sse2;
155 break;
156 case JCS_EXT_RGBX:
157 case JCS_EXT_RGBA:
158 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
160 break;
161 case JCS_EXT_BGR:
162 avx2fct = jsimd_extbgr_ycc_convert_avx2;
163 sse2fct = jsimd_extbgr_ycc_convert_sse2;
164 break;
165 case JCS_EXT_BGRX:
166 case JCS_EXT_BGRA:
167 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
169 break;
170 case JCS_EXT_XBGR:
171 case JCS_EXT_ABGR:
172 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
174 break;
175 case JCS_EXT_XRGB:
176 case JCS_EXT_ARGB:
177 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
179 break;
180 default:
181 avx2fct = jsimd_rgb_ycc_convert_avx2;
182 sse2fct = jsimd_rgb_ycc_convert_sse2;
183 break;
184 }
185
186 if (simd_support & JSIMD_AVX2)
187 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188 else
189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190 }
191
192 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194 JSAMPIMAGE output_buf, JDIMENSION output_row,
195 int num_rows)
196 {
197 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
199
200 switch (cinfo->in_color_space) {
201 case JCS_EXT_RGB:
202 avx2fct = jsimd_extrgb_gray_convert_avx2;
203 sse2fct = jsimd_extrgb_gray_convert_sse2;
204 break;
205 case JCS_EXT_RGBX:
206 case JCS_EXT_RGBA:
207 avx2fct = jsimd_extrgbx_gray_convert_avx2;
208 sse2fct = jsimd_extrgbx_gray_convert_sse2;
209 break;
210 case JCS_EXT_BGR:
211 avx2fct = jsimd_extbgr_gray_convert_avx2;
212 sse2fct = jsimd_extbgr_gray_convert_sse2;
213 break;
214 case JCS_EXT_BGRX:
215 case JCS_EXT_BGRA:
216 avx2fct = jsimd_extbgrx_gray_convert_avx2;
217 sse2fct = jsimd_extbgrx_gray_convert_sse2;
218 break;
219 case JCS_EXT_XBGR:
220 case JCS_EXT_ABGR:
221 avx2fct = jsimd_extxbgr_gray_convert_avx2;
222 sse2fct = jsimd_extxbgr_gray_convert_sse2;
223 break;
224 case JCS_EXT_XRGB:
225 case JCS_EXT_ARGB:
226 avx2fct = jsimd_extxrgb_gray_convert_avx2;
227 sse2fct = jsimd_extxrgb_gray_convert_sse2;
228 break;
229 default:
230 avx2fct = jsimd_rgb_gray_convert_avx2;
231 sse2fct = jsimd_rgb_gray_convert_sse2;
232 break;
233 }
234
235 if (simd_support & JSIMD_AVX2)
236 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
237 else
238 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
239 }
240
241 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)242 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
243 JDIMENSION input_row, JSAMPARRAY output_buf,
244 int num_rows)
245 {
246 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
247 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
248
249 switch (cinfo->out_color_space) {
250 case JCS_EXT_RGB:
251 avx2fct = jsimd_ycc_extrgb_convert_avx2;
252 sse2fct = jsimd_ycc_extrgb_convert_sse2;
253 break;
254 case JCS_EXT_RGBX:
255 case JCS_EXT_RGBA:
256 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
257 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
258 break;
259 case JCS_EXT_BGR:
260 avx2fct = jsimd_ycc_extbgr_convert_avx2;
261 sse2fct = jsimd_ycc_extbgr_convert_sse2;
262 break;
263 case JCS_EXT_BGRX:
264 case JCS_EXT_BGRA:
265 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
266 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
267 break;
268 case JCS_EXT_XBGR:
269 case JCS_EXT_ABGR:
270 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
271 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
272 break;
273 case JCS_EXT_XRGB:
274 case JCS_EXT_ARGB:
275 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
276 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
277 break;
278 default:
279 avx2fct = jsimd_ycc_rgb_convert_avx2;
280 sse2fct = jsimd_ycc_rgb_convert_sse2;
281 break;
282 }
283
284 if (simd_support & JSIMD_AVX2)
285 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
286 else
287 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
288 }
289
290 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)291 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
292 JDIMENSION input_row, JSAMPARRAY output_buf,
293 int num_rows)
294 {
295 }
296
297 GLOBAL(int)
jsimd_can_h2v2_downsample(void)298 jsimd_can_h2v2_downsample(void)
299 {
300 init_simd();
301
302 /* The code is optimised for these values only */
303 if (BITS_IN_JSAMPLE != 8)
304 return 0;
305 if (sizeof(JDIMENSION) != 4)
306 return 0;
307
308 if (simd_support & JSIMD_AVX2)
309 return 1;
310 if (simd_support & JSIMD_SSE2)
311 return 1;
312
313 return 0;
314 }
315
316 GLOBAL(int)
jsimd_can_h2v1_downsample(void)317 jsimd_can_h2v1_downsample(void)
318 {
319 init_simd();
320
321 /* The code is optimised for these values only */
322 if (BITS_IN_JSAMPLE != 8)
323 return 0;
324 if (sizeof(JDIMENSION) != 4)
325 return 0;
326
327 if (simd_support & JSIMD_AVX2)
328 return 1;
329 if (simd_support & JSIMD_SSE2)
330 return 1;
331
332 return 0;
333 }
334
335 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)336 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
337 JSAMPARRAY input_data, JSAMPARRAY output_data)
338 {
339 if (simd_support & JSIMD_AVX2)
340 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
341 compptr->v_samp_factor,
342 compptr->width_in_blocks, input_data,
343 output_data);
344 else
345 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
346 compptr->v_samp_factor,
347 compptr->width_in_blocks, input_data,
348 output_data);
349 }
350
351 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)352 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
353 JSAMPARRAY input_data, JSAMPARRAY output_data)
354 {
355 if (simd_support & JSIMD_AVX2)
356 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
357 compptr->v_samp_factor,
358 compptr->width_in_blocks, input_data,
359 output_data);
360 else
361 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
362 compptr->v_samp_factor,
363 compptr->width_in_blocks, input_data,
364 output_data);
365 }
366
367 GLOBAL(int)
jsimd_can_h2v2_upsample(void)368 jsimd_can_h2v2_upsample(void)
369 {
370 init_simd();
371
372 /* The code is optimised for these values only */
373 if (BITS_IN_JSAMPLE != 8)
374 return 0;
375 if (sizeof(JDIMENSION) != 4)
376 return 0;
377
378 if (simd_support & JSIMD_AVX2)
379 return 1;
380 if (simd_support & JSIMD_SSE2)
381 return 1;
382
383 return 0;
384 }
385
386 GLOBAL(int)
jsimd_can_h2v1_upsample(void)387 jsimd_can_h2v1_upsample(void)
388 {
389 init_simd();
390
391 /* The code is optimised for these values only */
392 if (BITS_IN_JSAMPLE != 8)
393 return 0;
394 if (sizeof(JDIMENSION) != 4)
395 return 0;
396
397 if (simd_support & JSIMD_AVX2)
398 return 1;
399 if (simd_support & JSIMD_SSE2)
400 return 1;
401
402 return 0;
403 }
404
405 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)406 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
407 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
408 {
409 if (simd_support & JSIMD_AVX2)
410 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
411 input_data, output_data_ptr);
412 else
413 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
414 input_data, output_data_ptr);
415 }
416
417 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)418 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
420 {
421 if (simd_support & JSIMD_AVX2)
422 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
423 input_data, output_data_ptr);
424 else
425 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
426 input_data, output_data_ptr);
427 }
428
429 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)430 jsimd_can_h2v2_fancy_upsample(void)
431 {
432 init_simd();
433
434 /* The code is optimised for these values only */
435 if (BITS_IN_JSAMPLE != 8)
436 return 0;
437 if (sizeof(JDIMENSION) != 4)
438 return 0;
439
440 if ((simd_support & JSIMD_AVX2) &&
441 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
442 return 1;
443 if ((simd_support & JSIMD_SSE2) &&
444 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
445 return 1;
446
447 return 0;
448 }
449
450 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)451 jsimd_can_h2v1_fancy_upsample(void)
452 {
453 init_simd();
454
455 /* The code is optimised for these values only */
456 if (BITS_IN_JSAMPLE != 8)
457 return 0;
458 if (sizeof(JDIMENSION) != 4)
459 return 0;
460
461 if ((simd_support & JSIMD_AVX2) &&
462 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
463 return 1;
464 if ((simd_support & JSIMD_SSE2) &&
465 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
466 return 1;
467
468 return 0;
469 }
470
471 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)472 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
473 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
474 {
475 if (simd_support & JSIMD_AVX2)
476 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
477 compptr->downsampled_width, input_data,
478 output_data_ptr);
479 else
480 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
481 compptr->downsampled_width, input_data,
482 output_data_ptr);
483 }
484
485 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)486 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
487 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
488 {
489 if (simd_support & JSIMD_AVX2)
490 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
491 compptr->downsampled_width, input_data,
492 output_data_ptr);
493 else
494 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
495 compptr->downsampled_width, input_data,
496 output_data_ptr);
497 }
498
499 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)500 jsimd_can_h2v2_merged_upsample(void)
501 {
502 init_simd();
503
504 /* The code is optimised for these values only */
505 if (BITS_IN_JSAMPLE != 8)
506 return 0;
507 if (sizeof(JDIMENSION) != 4)
508 return 0;
509
510 if ((simd_support & JSIMD_AVX2) &&
511 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
512 return 1;
513 if ((simd_support & JSIMD_SSE2) &&
514 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
515 return 1;
516
517 return 0;
518 }
519
520 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)521 jsimd_can_h2v1_merged_upsample(void)
522 {
523 init_simd();
524
525 /* The code is optimised for these values only */
526 if (BITS_IN_JSAMPLE != 8)
527 return 0;
528 if (sizeof(JDIMENSION) != 4)
529 return 0;
530
531 if ((simd_support & JSIMD_AVX2) &&
532 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
533 return 1;
534 if ((simd_support & JSIMD_SSE2) &&
535 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
536 return 1;
537
538 return 0;
539 }
540
541 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)542 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
543 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
544 {
545 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
546 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
547
548 switch (cinfo->out_color_space) {
549 case JCS_EXT_RGB:
550 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
551 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
552 break;
553 case JCS_EXT_RGBX:
554 case JCS_EXT_RGBA:
555 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
556 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
557 break;
558 case JCS_EXT_BGR:
559 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
560 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
561 break;
562 case JCS_EXT_BGRX:
563 case JCS_EXT_BGRA:
564 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
565 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
566 break;
567 case JCS_EXT_XBGR:
568 case JCS_EXT_ABGR:
569 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
570 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
571 break;
572 case JCS_EXT_XRGB:
573 case JCS_EXT_ARGB:
574 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
575 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
576 break;
577 default:
578 avx2fct = jsimd_h2v2_merged_upsample_avx2;
579 sse2fct = jsimd_h2v2_merged_upsample_sse2;
580 break;
581 }
582
583 if (simd_support & JSIMD_AVX2)
584 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
585 else
586 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
587 }
588
589 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)590 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
591 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
592 {
593 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
594 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
595
596 switch (cinfo->out_color_space) {
597 case JCS_EXT_RGB:
598 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
599 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
600 break;
601 case JCS_EXT_RGBX:
602 case JCS_EXT_RGBA:
603 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
604 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
605 break;
606 case JCS_EXT_BGR:
607 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
608 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
609 break;
610 case JCS_EXT_BGRX:
611 case JCS_EXT_BGRA:
612 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
613 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
614 break;
615 case JCS_EXT_XBGR:
616 case JCS_EXT_ABGR:
617 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
618 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
619 break;
620 case JCS_EXT_XRGB:
621 case JCS_EXT_ARGB:
622 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
623 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
624 break;
625 default:
626 avx2fct = jsimd_h2v1_merged_upsample_avx2;
627 sse2fct = jsimd_h2v1_merged_upsample_sse2;
628 break;
629 }
630
631 if (simd_support & JSIMD_AVX2)
632 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
633 else
634 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
635 }
636
637 GLOBAL(int)
jsimd_can_convsamp(void)638 jsimd_can_convsamp(void)
639 {
640 init_simd();
641
642 /* The code is optimised for these values only */
643 if (DCTSIZE != 8)
644 return 0;
645 if (BITS_IN_JSAMPLE != 8)
646 return 0;
647 if (sizeof(JDIMENSION) != 4)
648 return 0;
649 if (sizeof(DCTELEM) != 2)
650 return 0;
651
652 if (simd_support & JSIMD_AVX2)
653 return 1;
654 if (simd_support & JSIMD_SSE2)
655 return 1;
656
657 return 0;
658 }
659
660 GLOBAL(int)
jsimd_can_convsamp_float(void)661 jsimd_can_convsamp_float(void)
662 {
663 init_simd();
664
665 /* The code is optimised for these values only */
666 if (DCTSIZE != 8)
667 return 0;
668 if (BITS_IN_JSAMPLE != 8)
669 return 0;
670 if (sizeof(JDIMENSION) != 4)
671 return 0;
672 if (sizeof(FAST_FLOAT) != 4)
673 return 0;
674
675 if (simd_support & JSIMD_SSE2)
676 return 1;
677
678 return 0;
679 }
680
681 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)682 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
683 DCTELEM *workspace)
684 {
685 if (simd_support & JSIMD_AVX2)
686 jsimd_convsamp_avx2(sample_data, start_col, workspace);
687 else
688 jsimd_convsamp_sse2(sample_data, start_col, workspace);
689 }
690
691 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)692 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
693 FAST_FLOAT *workspace)
694 {
695 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
696 }
697
698 GLOBAL(int)
jsimd_can_fdct_islow(void)699 jsimd_can_fdct_islow(void)
700 {
701 init_simd();
702
703 /* The code is optimised for these values only */
704 if (DCTSIZE != 8)
705 return 0;
706 if (sizeof(DCTELEM) != 2)
707 return 0;
708
709 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
710 return 1;
711 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
712 return 1;
713
714 return 0;
715 }
716
717 GLOBAL(int)
jsimd_can_fdct_ifast(void)718 jsimd_can_fdct_ifast(void)
719 {
720 init_simd();
721
722 /* The code is optimised for these values only */
723 if (DCTSIZE != 8)
724 return 0;
725 if (sizeof(DCTELEM) != 2)
726 return 0;
727
728 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
729 return 1;
730
731 return 0;
732 }
733
734 GLOBAL(int)
jsimd_can_fdct_float(void)735 jsimd_can_fdct_float(void)
736 {
737 init_simd();
738
739 /* The code is optimised for these values only */
740 if (DCTSIZE != 8)
741 return 0;
742 if (sizeof(FAST_FLOAT) != 4)
743 return 0;
744
745 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
746 return 1;
747
748 return 0;
749 }
750
751 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)752 jsimd_fdct_islow(DCTELEM *data)
753 {
754 if (simd_support & JSIMD_AVX2)
755 jsimd_fdct_islow_avx2(data);
756 else
757 jsimd_fdct_islow_sse2(data);
758 }
759
760 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)761 jsimd_fdct_ifast(DCTELEM *data)
762 {
763 jsimd_fdct_ifast_sse2(data);
764 }
765
766 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)767 jsimd_fdct_float(FAST_FLOAT *data)
768 {
769 jsimd_fdct_float_sse(data);
770 }
771
772 GLOBAL(int)
jsimd_can_quantize(void)773 jsimd_can_quantize(void)
774 {
775 init_simd();
776
777 /* The code is optimised for these values only */
778 if (DCTSIZE != 8)
779 return 0;
780 if (sizeof(JCOEF) != 2)
781 return 0;
782 if (sizeof(DCTELEM) != 2)
783 return 0;
784
785 if (simd_support & JSIMD_AVX2)
786 return 1;
787 if (simd_support & JSIMD_SSE2)
788 return 1;
789
790 return 0;
791 }
792
793 GLOBAL(int)
jsimd_can_quantize_float(void)794 jsimd_can_quantize_float(void)
795 {
796 init_simd();
797
798 /* The code is optimised for these values only */
799 if (DCTSIZE != 8)
800 return 0;
801 if (sizeof(JCOEF) != 2)
802 return 0;
803 if (sizeof(FAST_FLOAT) != 4)
804 return 0;
805
806 if (simd_support & JSIMD_SSE2)
807 return 1;
808
809 return 0;
810 }
811
812 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)813 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
814 {
815 if (simd_support & JSIMD_AVX2)
816 jsimd_quantize_avx2(coef_block, divisors, workspace);
817 else
818 jsimd_quantize_sse2(coef_block, divisors, workspace);
819 }
820
821 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)822 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
823 FAST_FLOAT *workspace)
824 {
825 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
826 }
827
828 GLOBAL(int)
jsimd_can_idct_2x2(void)829 jsimd_can_idct_2x2(void)
830 {
831 init_simd();
832
833 /* The code is optimised for these values only */
834 if (DCTSIZE != 8)
835 return 0;
836 if (sizeof(JCOEF) != 2)
837 return 0;
838 if (BITS_IN_JSAMPLE != 8)
839 return 0;
840 if (sizeof(JDIMENSION) != 4)
841 return 0;
842 if (sizeof(ISLOW_MULT_TYPE) != 2)
843 return 0;
844
845 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
846 return 1;
847
848 return 0;
849 }
850
851 GLOBAL(int)
jsimd_can_idct_4x4(void)852 jsimd_can_idct_4x4(void)
853 {
854 init_simd();
855
856 /* The code is optimised for these values only */
857 if (DCTSIZE != 8)
858 return 0;
859 if (sizeof(JCOEF) != 2)
860 return 0;
861 if (BITS_IN_JSAMPLE != 8)
862 return 0;
863 if (sizeof(JDIMENSION) != 4)
864 return 0;
865 if (sizeof(ISLOW_MULT_TYPE) != 2)
866 return 0;
867
868 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
869 return 1;
870
871 return 0;
872 }
873
874 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)875 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
876 JCOEFPTR coef_block, JSAMPARRAY output_buf,
877 JDIMENSION output_col)
878 {
879 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
880 }
881
882 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)883 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
884 JCOEFPTR coef_block, JSAMPARRAY output_buf,
885 JDIMENSION output_col)
886 {
887 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
888 }
889
890 GLOBAL(int)
jsimd_can_idct_islow(void)891 jsimd_can_idct_islow(void)
892 {
893 init_simd();
894
895 /* The code is optimised for these values only */
896 if (DCTSIZE != 8)
897 return 0;
898 if (sizeof(JCOEF) != 2)
899 return 0;
900 if (BITS_IN_JSAMPLE != 8)
901 return 0;
902 if (sizeof(JDIMENSION) != 4)
903 return 0;
904 if (sizeof(ISLOW_MULT_TYPE) != 2)
905 return 0;
906
907 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
908 return 1;
909 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
910 return 1;
911
912 return 0;
913 }
914
915 GLOBAL(int)
jsimd_can_idct_ifast(void)916 jsimd_can_idct_ifast(void)
917 {
918 init_simd();
919
920 /* The code is optimised for these values only */
921 if (DCTSIZE != 8)
922 return 0;
923 if (sizeof(JCOEF) != 2)
924 return 0;
925 if (BITS_IN_JSAMPLE != 8)
926 return 0;
927 if (sizeof(JDIMENSION) != 4)
928 return 0;
929 if (sizeof(IFAST_MULT_TYPE) != 2)
930 return 0;
931 if (IFAST_SCALE_BITS != 2)
932 return 0;
933
934 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
935 return 1;
936
937 return 0;
938 }
939
940 GLOBAL(int)
jsimd_can_idct_float(void)941 jsimd_can_idct_float(void)
942 {
943 init_simd();
944
945 if (DCTSIZE != 8)
946 return 0;
947 if (sizeof(JCOEF) != 2)
948 return 0;
949 if (BITS_IN_JSAMPLE != 8)
950 return 0;
951 if (sizeof(JDIMENSION) != 4)
952 return 0;
953 if (sizeof(FAST_FLOAT) != 4)
954 return 0;
955 if (sizeof(FLOAT_MULT_TYPE) != 4)
956 return 0;
957
958 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
959 return 1;
960
961 return 0;
962 }
963
964 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)965 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
966 JCOEFPTR coef_block, JSAMPARRAY output_buf,
967 JDIMENSION output_col)
968 {
969 if (simd_support & JSIMD_AVX2)
970 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
971 output_col);
972 else
973 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
974 output_col);
975 }
976
977 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)978 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
979 JCOEFPTR coef_block, JSAMPARRAY output_buf,
980 JDIMENSION output_col)
981 {
982 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
983 output_col);
984 }
985
986 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)987 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
988 JCOEFPTR coef_block, JSAMPARRAY output_buf,
989 JDIMENSION output_col)
990 {
991 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
992 output_col);
993 }
994
995 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)996 jsimd_can_huff_encode_one_block(void)
997 {
998 init_simd();
999
1000 if (DCTSIZE != 8)
1001 return 0;
1002 if (sizeof(JCOEF) != 2)
1003 return 0;
1004
1005 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1006 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1007 return 1;
1008
1009 return 0;
1010 }
1011
1012 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1013 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1014 int last_dc_val, c_derived_tbl *dctbl,
1015 c_derived_tbl *actbl)
1016 {
1017 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1018 dctbl, actbl);
1019 }
1020
1021 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1022 jsimd_can_encode_mcu_AC_first_prepare(void)
1023 {
1024 init_simd();
1025
1026 if (DCTSIZE != 8)
1027 return 0;
1028 if (sizeof(JCOEF) != 2)
1029 return 0;
1030 if (simd_support & JSIMD_SSE2)
1031 return 1;
1032
1033 return 0;
1034 }
1035
1036 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1037 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1038 const int *jpeg_natural_order_start, int Sl,
1039 int Al, JCOEF *values, size_t *zerobits)
1040 {
1041 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1042 Sl, Al, values, zerobits);
1043 }
1044
1045 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1046 jsimd_can_encode_mcu_AC_refine_prepare(void)
1047 {
1048 init_simd();
1049
1050 if (DCTSIZE != 8)
1051 return 0;
1052 if (sizeof(JCOEF) != 2)
1053 return 0;
1054 if (simd_support & JSIMD_SSE2)
1055 return 1;
1056
1057 return 0;
1058 }
1059
1060 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1061 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1062 const int *jpeg_natural_order_start, int Sl,
1063 int Al, JCOEF *absvalues, size_t *bits)
1064 {
1065 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1066 jpeg_natural_order_start,
1067 Sl, Al, absvalues, bits);
1068 }
1069