1 /*
2 * jsimd_arm64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Copyright (C) 2020, Arm Limited.
9 *
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
13 *
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 64-bit Arm architecture.
17 */
18
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26 #include "jconfigint.h"
27
28 #include <ctype.h>
29
30 #define JSIMD_FASTLD3 1
31 #define JSIMD_FASTST3 2
32 #define JSIMD_FASTTBL 4
33
34 static unsigned int simd_support = ~0;
35 static unsigned int simd_huffman = 1;
36 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
37 JSIMD_FASTTBL;
38
39 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
40
41 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
42
43 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)44 check_cpuinfo(char *buffer, const char *field, char *value)
45 {
46 char *p;
47
48 if (*value == 0)
49 return 0;
50 if (strncmp(buffer, field, strlen(field)) != 0)
51 return 0;
52 buffer += strlen(field);
53 while (isspace(*buffer))
54 buffer++;
55
56 /* Check if 'value' is present in the buffer as a separate word */
57 while ((p = strstr(buffer, value))) {
58 if (p > buffer && !isspace(*(p - 1))) {
59 buffer++;
60 continue;
61 }
62 p += strlen(value);
63 if (*p != 0 && !isspace(*p)) {
64 buffer++;
65 continue;
66 }
67 return 1;
68 }
69 return 0;
70 }
71
72 LOCAL(int)
parse_proc_cpuinfo(int bufsize)73 parse_proc_cpuinfo(int bufsize)
74 {
75 char *buffer = (char *)malloc(bufsize);
76 FILE *fd;
77
78 if (!buffer)
79 return 0;
80
81 fd = fopen("/proc/cpuinfo", "r");
82 if (fd) {
83 while (fgets(buffer, bufsize, fd)) {
84 if (!strchr(buffer, '\n') && !feof(fd)) {
85 /* "impossible" happened - insufficient size of the buffer! */
86 fclose(fd);
87 free(buffer);
88 return 0;
89 }
90 if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
91 check_cpuinfo(buffer, "CPU part", "0xd07"))
92 /* The Cortex-A53 has a slow tbl implementation. We can gain a few
93 percent speedup by disabling the use of that instruction. The
94 speedup on Cortex-A57 is more subtle but still measurable. */
95 simd_features &= ~JSIMD_FASTTBL;
96 else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
97 /* The SIMD version of Huffman encoding is slower than the C version on
98 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that
99 CPU. */
100 simd_huffman = simd_features = 0;
101 }
102 fclose(fd);
103 }
104 free(buffer);
105 return 1;
106 }
107
108 #endif
109
110 /*
111 * Check what SIMD accelerations are supported.
112 *
113 * FIXME: This code is racy under a multi-threaded environment.
114 */
115
116 /*
117 * Armv8 architectures support Neon extensions by default.
118 * It is no longer optional as it was with Armv7.
119 */
120
121
122 LOCAL(void)
init_simd(void)123 init_simd(void)
124 {
125 #ifndef NO_GETENV
126 char env[2] = { 0 };
127 #endif
128 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
129 int bufsize = 1024; /* an initial guess for the line buffer size limit */
130 #endif
131
132 if (simd_support != ~0U)
133 return;
134
135 simd_support = 0;
136
137 simd_support |= JSIMD_NEON;
138 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
139 while (!parse_proc_cpuinfo(bufsize)) {
140 bufsize *= 2;
141 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
142 break;
143 }
144 #endif
145
146 #ifndef NO_GETENV
147 /* Force different settings through environment variables */
148 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
149 simd_support = JSIMD_NEON;
150 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
151 simd_support = 0;
152 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
153 simd_huffman = 0;
154 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
155 simd_features |= JSIMD_FASTLD3;
156 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
157 simd_features &= ~JSIMD_FASTLD3;
158 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
159 simd_features |= JSIMD_FASTST3;
160 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
161 simd_features &= ~JSIMD_FASTST3;
162 #endif
163 }
164
165 GLOBAL(int)
jsimd_can_rgb_ycc(void)166 jsimd_can_rgb_ycc(void)
167 {
168 init_simd();
169
170 /* The code is optimised for these values only */
171 if (BITS_IN_JSAMPLE != 8)
172 return 0;
173 if (sizeof(JDIMENSION) != 4)
174 return 0;
175 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
176 return 0;
177
178 if (simd_support & JSIMD_NEON)
179 return 1;
180
181 return 0;
182 }
183
184 GLOBAL(int)
jsimd_can_rgb_gray(void)185 jsimd_can_rgb_gray(void)
186 {
187 init_simd();
188
189 /* The code is optimised for these values only */
190 if (BITS_IN_JSAMPLE != 8)
191 return 0;
192 if (sizeof(JDIMENSION) != 4)
193 return 0;
194 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
195 return 0;
196
197 if (simd_support & JSIMD_NEON)
198 return 1;
199
200 return 0;
201 }
202
203 GLOBAL(int)
jsimd_can_ycc_rgb(void)204 jsimd_can_ycc_rgb(void)
205 {
206 init_simd();
207
208 /* The code is optimised for these values only */
209 if (BITS_IN_JSAMPLE != 8)
210 return 0;
211 if (sizeof(JDIMENSION) != 4)
212 return 0;
213 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
214 return 0;
215
216 if (simd_support & JSIMD_NEON)
217 return 1;
218
219 return 0;
220 }
221
222 GLOBAL(int)
jsimd_can_ycc_rgb565(void)223 jsimd_can_ycc_rgb565(void)
224 {
225 init_simd();
226
227 /* The code is optimised for these values only */
228 if (BITS_IN_JSAMPLE != 8)
229 return 0;
230 if (sizeof(JDIMENSION) != 4)
231 return 0;
232
233 if (simd_support & JSIMD_NEON)
234 return 1;
235
236 return 0;
237 }
238
239 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)240 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
241 JSAMPIMAGE output_buf, JDIMENSION output_row,
242 int num_rows)
243 {
244 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
245
246 switch (cinfo->in_color_space) {
247 case JCS_EXT_RGB:
248 #ifndef NEON_INTRINSICS
249 if (simd_features & JSIMD_FASTLD3)
250 #endif
251 neonfct = jsimd_extrgb_ycc_convert_neon;
252 #ifndef NEON_INTRINSICS
253 else
254 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
255 #endif
256 break;
257 case JCS_EXT_RGBX:
258 case JCS_EXT_RGBA:
259 neonfct = jsimd_extrgbx_ycc_convert_neon;
260 break;
261 case JCS_EXT_BGR:
262 #ifndef NEON_INTRINSICS
263 if (simd_features & JSIMD_FASTLD3)
264 #endif
265 neonfct = jsimd_extbgr_ycc_convert_neon;
266 #ifndef NEON_INTRINSICS
267 else
268 neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
269 #endif
270 break;
271 case JCS_EXT_BGRX:
272 case JCS_EXT_BGRA:
273 neonfct = jsimd_extbgrx_ycc_convert_neon;
274 break;
275 case JCS_EXT_XBGR:
276 case JCS_EXT_ABGR:
277 neonfct = jsimd_extxbgr_ycc_convert_neon;
278 break;
279 case JCS_EXT_XRGB:
280 case JCS_EXT_ARGB:
281 neonfct = jsimd_extxrgb_ycc_convert_neon;
282 break;
283 default:
284 #ifndef NEON_INTRINSICS
285 if (simd_features & JSIMD_FASTLD3)
286 #endif
287 neonfct = jsimd_extrgb_ycc_convert_neon;
288 #ifndef NEON_INTRINSICS
289 else
290 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
291 #endif
292 break;
293 }
294
295 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
296 }
297
298 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)299 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
300 JSAMPIMAGE output_buf, JDIMENSION output_row,
301 int num_rows)
302 {
303 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
304
305 switch (cinfo->in_color_space) {
306 case JCS_EXT_RGB:
307 neonfct = jsimd_extrgb_gray_convert_neon;
308 break;
309 case JCS_EXT_RGBX:
310 case JCS_EXT_RGBA:
311 neonfct = jsimd_extrgbx_gray_convert_neon;
312 break;
313 case JCS_EXT_BGR:
314 neonfct = jsimd_extbgr_gray_convert_neon;
315 break;
316 case JCS_EXT_BGRX:
317 case JCS_EXT_BGRA:
318 neonfct = jsimd_extbgrx_gray_convert_neon;
319 break;
320 case JCS_EXT_XBGR:
321 case JCS_EXT_ABGR:
322 neonfct = jsimd_extxbgr_gray_convert_neon;
323 break;
324 case JCS_EXT_XRGB:
325 case JCS_EXT_ARGB:
326 neonfct = jsimd_extxrgb_gray_convert_neon;
327 break;
328 default:
329 neonfct = jsimd_extrgb_gray_convert_neon;
330 break;
331 }
332
333 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
334 }
335
336 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)337 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
338 JDIMENSION input_row, JSAMPARRAY output_buf,
339 int num_rows)
340 {
341 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
342
343 switch (cinfo->out_color_space) {
344 case JCS_EXT_RGB:
345 #ifndef NEON_INTRINSICS
346 if (simd_features & JSIMD_FASTST3)
347 #endif
348 neonfct = jsimd_ycc_extrgb_convert_neon;
349 #ifndef NEON_INTRINSICS
350 else
351 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
352 #endif
353 break;
354 case JCS_EXT_RGBX:
355 case JCS_EXT_RGBA:
356 neonfct = jsimd_ycc_extrgbx_convert_neon;
357 break;
358 case JCS_EXT_BGR:
359 #ifndef NEON_INTRINSICS
360 if (simd_features & JSIMD_FASTST3)
361 #endif
362 neonfct = jsimd_ycc_extbgr_convert_neon;
363 #ifndef NEON_INTRINSICS
364 else
365 neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
366 #endif
367 break;
368 case JCS_EXT_BGRX:
369 case JCS_EXT_BGRA:
370 neonfct = jsimd_ycc_extbgrx_convert_neon;
371 break;
372 case JCS_EXT_XBGR:
373 case JCS_EXT_ABGR:
374 neonfct = jsimd_ycc_extxbgr_convert_neon;
375 break;
376 case JCS_EXT_XRGB:
377 case JCS_EXT_ARGB:
378 neonfct = jsimd_ycc_extxrgb_convert_neon;
379 break;
380 default:
381 #ifndef NEON_INTRINSICS
382 if (simd_features & JSIMD_FASTST3)
383 #endif
384 neonfct = jsimd_ycc_extrgb_convert_neon;
385 #ifndef NEON_INTRINSICS
386 else
387 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
388 #endif
389 break;
390 }
391
392 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
393 }
394
395 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)396 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
397 JDIMENSION input_row, JSAMPARRAY output_buf,
398 int num_rows)
399 {
400 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
401 output_buf, num_rows);
402 }
403
404 GLOBAL(int)
jsimd_can_h2v2_downsample(void)405 jsimd_can_h2v2_downsample(void)
406 {
407 init_simd();
408
409 /* The code is optimised for these values only */
410 if (BITS_IN_JSAMPLE != 8)
411 return 0;
412 if (DCTSIZE != 8)
413 return 0;
414 if (sizeof(JDIMENSION) != 4)
415 return 0;
416
417 if (simd_support & JSIMD_NEON)
418 return 1;
419
420 return 0;
421 }
422
423 GLOBAL(int)
jsimd_can_h2v1_downsample(void)424 jsimd_can_h2v1_downsample(void)
425 {
426 init_simd();
427
428 /* The code is optimised for these values only */
429 if (BITS_IN_JSAMPLE != 8)
430 return 0;
431 if (DCTSIZE != 8)
432 return 0;
433 if (sizeof(JDIMENSION) != 4)
434 return 0;
435
436 if (simd_support & JSIMD_NEON)
437 return 1;
438
439 return 0;
440 }
441
442 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)443 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
444 JSAMPARRAY input_data, JSAMPARRAY output_data)
445 {
446 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
447 compptr->v_samp_factor, compptr->width_in_blocks,
448 input_data, output_data);
449 }
450
451 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)452 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
453 JSAMPARRAY input_data, JSAMPARRAY output_data)
454 {
455 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
456 compptr->v_samp_factor, compptr->width_in_blocks,
457 input_data, output_data);
458 }
459
460 GLOBAL(int)
jsimd_can_h2v2_upsample(void)461 jsimd_can_h2v2_upsample(void)
462 {
463 init_simd();
464
465 /* The code is optimised for these values only */
466 if (BITS_IN_JSAMPLE != 8)
467 return 0;
468 if (sizeof(JDIMENSION) != 4)
469 return 0;
470
471 if (simd_support & JSIMD_NEON)
472 return 1;
473
474 return 0;
475 }
476
477 GLOBAL(int)
jsimd_can_h2v1_upsample(void)478 jsimd_can_h2v1_upsample(void)
479 {
480 init_simd();
481
482 /* The code is optimised for these values only */
483 if (BITS_IN_JSAMPLE != 8)
484 return 0;
485 if (sizeof(JDIMENSION) != 4)
486 return 0;
487 if (simd_support & JSIMD_NEON)
488 return 1;
489
490 return 0;
491 }
492
493 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)494 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
495 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
496 {
497 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
498 input_data, output_data_ptr);
499 }
500
501 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)502 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
503 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
504 {
505 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
506 input_data, output_data_ptr);
507 }
508
509 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)510 jsimd_can_h2v2_fancy_upsample(void)
511 {
512 init_simd();
513
514 /* The code is optimised for these values only */
515 if (BITS_IN_JSAMPLE != 8)
516 return 0;
517 if (sizeof(JDIMENSION) != 4)
518 return 0;
519
520 if (simd_support & JSIMD_NEON)
521 return 1;
522
523 return 0;
524 }
525
526 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)527 jsimd_can_h2v1_fancy_upsample(void)
528 {
529 init_simd();
530
531 /* The code is optimised for these values only */
532 if (BITS_IN_JSAMPLE != 8)
533 return 0;
534 if (sizeof(JDIMENSION) != 4)
535 return 0;
536
537 if (simd_support & JSIMD_NEON)
538 return 1;
539
540 return 0;
541 }
542
543 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)544 jsimd_can_h1v2_fancy_upsample(void)
545 {
546 init_simd();
547
548 /* The code is optimised for these values only */
549 if (BITS_IN_JSAMPLE != 8)
550 return 0;
551 if (sizeof(JDIMENSION) != 4)
552 return 0;
553
554 if (simd_support & JSIMD_NEON)
555 return 1;
556
557 return 0;
558 }
559
560 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)561 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
562 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
563 {
564 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
565 compptr->downsampled_width, input_data,
566 output_data_ptr);
567 }
568
569 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)570 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
571 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
572 {
573 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
574 compptr->downsampled_width, input_data,
575 output_data_ptr);
576 }
577
578 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)579 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
580 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
581 {
582 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
583 compptr->downsampled_width, input_data,
584 output_data_ptr);
585 }
586
587 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)588 jsimd_can_h2v2_merged_upsample(void)
589 {
590 init_simd();
591
592 /* The code is optimised for these values only */
593 if (BITS_IN_JSAMPLE != 8)
594 return 0;
595 if (sizeof(JDIMENSION) != 4)
596 return 0;
597
598 if (simd_support & JSIMD_NEON)
599 return 1;
600
601 return 0;
602 }
603
604 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)605 jsimd_can_h2v1_merged_upsample(void)
606 {
607 init_simd();
608
609 /* The code is optimised for these values only */
610 if (BITS_IN_JSAMPLE != 8)
611 return 0;
612 if (sizeof(JDIMENSION) != 4)
613 return 0;
614
615 if (simd_support & JSIMD_NEON)
616 return 1;
617
618 return 0;
619 }
620
621 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)622 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
624 {
625 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
626
627 switch (cinfo->out_color_space) {
628 case JCS_EXT_RGB:
629 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
630 break;
631 case JCS_EXT_RGBX:
632 case JCS_EXT_RGBA:
633 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
634 break;
635 case JCS_EXT_BGR:
636 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
637 break;
638 case JCS_EXT_BGRX:
639 case JCS_EXT_BGRA:
640 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
641 break;
642 case JCS_EXT_XBGR:
643 case JCS_EXT_ABGR:
644 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
645 break;
646 case JCS_EXT_XRGB:
647 case JCS_EXT_ARGB:
648 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
649 break;
650 default:
651 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
652 break;
653 }
654
655 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
656 }
657
658 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)659 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
660 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
661 {
662 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
663
664 switch (cinfo->out_color_space) {
665 case JCS_EXT_RGB:
666 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
667 break;
668 case JCS_EXT_RGBX:
669 case JCS_EXT_RGBA:
670 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
671 break;
672 case JCS_EXT_BGR:
673 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
674 break;
675 case JCS_EXT_BGRX:
676 case JCS_EXT_BGRA:
677 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
678 break;
679 case JCS_EXT_XBGR:
680 case JCS_EXT_ABGR:
681 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
682 break;
683 case JCS_EXT_XRGB:
684 case JCS_EXT_ARGB:
685 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
686 break;
687 default:
688 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
689 break;
690 }
691
692 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
693 }
694
695 GLOBAL(int)
jsimd_can_convsamp(void)696 jsimd_can_convsamp(void)
697 {
698 init_simd();
699
700 /* The code is optimised for these values only */
701 if (DCTSIZE != 8)
702 return 0;
703 if (BITS_IN_JSAMPLE != 8)
704 return 0;
705 if (sizeof(JDIMENSION) != 4)
706 return 0;
707 if (sizeof(DCTELEM) != 2)
708 return 0;
709
710 if (simd_support & JSIMD_NEON)
711 return 1;
712
713 return 0;
714 }
715
716 GLOBAL(int)
jsimd_can_convsamp_float(void)717 jsimd_can_convsamp_float(void)
718 {
719 return 0;
720 }
721
722 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)723 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
724 DCTELEM *workspace)
725 {
726 jsimd_convsamp_neon(sample_data, start_col, workspace);
727 }
728
729 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)730 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
731 FAST_FLOAT *workspace)
732 {
733 }
734
735 GLOBAL(int)
jsimd_can_fdct_islow(void)736 jsimd_can_fdct_islow(void)
737 {
738 init_simd();
739
740 /* The code is optimised for these values only */
741 if (DCTSIZE != 8)
742 return 0;
743 if (sizeof(DCTELEM) != 2)
744 return 0;
745
746 if (simd_support & JSIMD_NEON)
747 return 1;
748
749 return 0;
750 }
751
752 GLOBAL(int)
jsimd_can_fdct_ifast(void)753 jsimd_can_fdct_ifast(void)
754 {
755 init_simd();
756
757 /* The code is optimised for these values only */
758 if (DCTSIZE != 8)
759 return 0;
760 if (sizeof(DCTELEM) != 2)
761 return 0;
762
763 if (simd_support & JSIMD_NEON)
764 return 1;
765
766 return 0;
767 }
768
769 GLOBAL(int)
jsimd_can_fdct_float(void)770 jsimd_can_fdct_float(void)
771 {
772 return 0;
773 }
774
775 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)776 jsimd_fdct_islow(DCTELEM *data)
777 {
778 jsimd_fdct_islow_neon(data);
779 }
780
781 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)782 jsimd_fdct_ifast(DCTELEM *data)
783 {
784 jsimd_fdct_ifast_neon(data);
785 }
786
787 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)788 jsimd_fdct_float(FAST_FLOAT *data)
789 {
790 }
791
792 GLOBAL(int)
jsimd_can_quantize(void)793 jsimd_can_quantize(void)
794 {
795 init_simd();
796
797 /* The code is optimised for these values only */
798 if (DCTSIZE != 8)
799 return 0;
800 if (sizeof(JCOEF) != 2)
801 return 0;
802 if (sizeof(DCTELEM) != 2)
803 return 0;
804
805 if (simd_support & JSIMD_NEON)
806 return 1;
807
808 return 0;
809 }
810
811 GLOBAL(int)
jsimd_can_quantize_float(void)812 jsimd_can_quantize_float(void)
813 {
814 return 0;
815 }
816
817 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)818 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
819 {
820 jsimd_quantize_neon(coef_block, divisors, workspace);
821 }
822
823 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)824 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
825 FAST_FLOAT *workspace)
826 {
827 }
828
829 GLOBAL(int)
jsimd_can_idct_2x2(void)830 jsimd_can_idct_2x2(void)
831 {
832 init_simd();
833
834 /* The code is optimised for these values only */
835 if (DCTSIZE != 8)
836 return 0;
837 if (sizeof(JCOEF) != 2)
838 return 0;
839 if (BITS_IN_JSAMPLE != 8)
840 return 0;
841 if (sizeof(JDIMENSION) != 4)
842 return 0;
843 if (sizeof(ISLOW_MULT_TYPE) != 2)
844 return 0;
845
846 if (simd_support & JSIMD_NEON)
847 return 1;
848
849 return 0;
850 }
851
852 GLOBAL(int)
jsimd_can_idct_4x4(void)853 jsimd_can_idct_4x4(void)
854 {
855 init_simd();
856
857 /* The code is optimised for these values only */
858 if (DCTSIZE != 8)
859 return 0;
860 if (sizeof(JCOEF) != 2)
861 return 0;
862 if (BITS_IN_JSAMPLE != 8)
863 return 0;
864 if (sizeof(JDIMENSION) != 4)
865 return 0;
866 if (sizeof(ISLOW_MULT_TYPE) != 2)
867 return 0;
868
869 if (simd_support & JSIMD_NEON)
870 return 1;
871
872 return 0;
873 }
874
875 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)876 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
877 JCOEFPTR coef_block, JSAMPARRAY output_buf,
878 JDIMENSION output_col)
879 {
880 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
881 }
882
883 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)884 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
885 JCOEFPTR coef_block, JSAMPARRAY output_buf,
886 JDIMENSION output_col)
887 {
888 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
889 }
890
891 GLOBAL(int)
jsimd_can_idct_islow(void)892 jsimd_can_idct_islow(void)
893 {
894 init_simd();
895
896 /* The code is optimised for these values only */
897 if (DCTSIZE != 8)
898 return 0;
899 if (sizeof(JCOEF) != 2)
900 return 0;
901 if (BITS_IN_JSAMPLE != 8)
902 return 0;
903 if (sizeof(JDIMENSION) != 4)
904 return 0;
905 if (sizeof(ISLOW_MULT_TYPE) != 2)
906 return 0;
907
908 if (simd_support & JSIMD_NEON)
909 return 1;
910
911 return 0;
912 }
913
914 GLOBAL(int)
jsimd_can_idct_ifast(void)915 jsimd_can_idct_ifast(void)
916 {
917 init_simd();
918
919 /* The code is optimised for these values only */
920 if (DCTSIZE != 8)
921 return 0;
922 if (sizeof(JCOEF) != 2)
923 return 0;
924 if (BITS_IN_JSAMPLE != 8)
925 return 0;
926 if (sizeof(JDIMENSION) != 4)
927 return 0;
928 if (sizeof(IFAST_MULT_TYPE) != 2)
929 return 0;
930 if (IFAST_SCALE_BITS != 2)
931 return 0;
932
933 if (simd_support & JSIMD_NEON)
934 return 1;
935
936 return 0;
937 }
938
939 GLOBAL(int)
jsimd_can_idct_float(void)940 jsimd_can_idct_float(void)
941 {
942 return 0;
943 }
944
945 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)946 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
947 JCOEFPTR coef_block, JSAMPARRAY output_buf,
948 JDIMENSION output_col)
949 {
950 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
951 output_col);
952 }
953
954 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)955 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
956 JCOEFPTR coef_block, JSAMPARRAY output_buf,
957 JDIMENSION output_col)
958 {
959 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
960 output_col);
961 }
962
963 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)964 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
965 JCOEFPTR coef_block, JSAMPARRAY output_buf,
966 JDIMENSION output_col)
967 {
968 }
969
970 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)971 jsimd_can_huff_encode_one_block(void)
972 {
973 init_simd();
974
975 if (DCTSIZE != 8)
976 return 0;
977 if (sizeof(JCOEF) != 2)
978 return 0;
979
980 if (simd_support & JSIMD_NEON && simd_huffman)
981 return 1;
982
983 return 0;
984 }
985
986 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)987 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
988 int last_dc_val, c_derived_tbl *dctbl,
989 c_derived_tbl *actbl)
990 {
991 #ifndef NEON_INTRINSICS
992 if (simd_features & JSIMD_FASTTBL)
993 #endif
994 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
995 dctbl, actbl);
996 #ifndef NEON_INTRINSICS
997 else
998 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
999 last_dc_val, dctbl, actbl);
1000 #endif
1001 }
1002
1003 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1004 jsimd_can_encode_mcu_AC_first_prepare(void)
1005 {
1006 init_simd();
1007
1008 if (DCTSIZE != 8)
1009 return 0;
1010 if (sizeof(JCOEF) != 2)
1011 return 0;
1012 if (SIZEOF_SIZE_T != 8)
1013 return 0;
1014
1015 if (simd_support & JSIMD_NEON)
1016 return 1;
1017
1018 return 0;
1019 }
1020
1021 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1022 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1023 const int *jpeg_natural_order_start, int Sl,
1024 int Al, JCOEF *values, size_t *zerobits)
1025 {
1026 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1027 Sl, Al, values, zerobits);
1028 }
1029
1030 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1031 jsimd_can_encode_mcu_AC_refine_prepare(void)
1032 {
1033 init_simd();
1034
1035 if (DCTSIZE != 8)
1036 return 0;
1037 if (sizeof(JCOEF) != 2)
1038 return 0;
1039 if (SIZEOF_SIZE_T != 8)
1040 return 0;
1041
1042 if (simd_support & JSIMD_NEON)
1043 return 1;
1044
1045 return 0;
1046 }
1047
1048 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1049 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1050 const int *jpeg_natural_order_start, int Sl,
1051 int Al, JCOEF *absvalues, size_t *bits)
1052 {
1053 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1054 jpeg_natural_order_start,
1055 Sl, Al, absvalues, bits);
1056 }
1057