1 /*
2 * jsimd_arm64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8 * Copyright (C) 2020, Arm Limited.
9 *
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
13 *
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 64-bit Arm architecture.
17 */
18
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26 #include "jconfigint.h"
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <ctype.h>
31
32 #define JSIMD_FASTLD3 1
33 #define JSIMD_FASTST3 2
34 #define JSIMD_FASTTBL 4
35
36 static unsigned int simd_support = ~0;
37 static unsigned int simd_huffman = 1;
38 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
39 JSIMD_FASTTBL;
40
41 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
42
43 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
44
45 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)46 check_cpuinfo(char *buffer, const char *field, char *value)
47 {
48 char *p;
49
50 if (*value == 0)
51 return 0;
52 if (strncmp(buffer, field, strlen(field)) != 0)
53 return 0;
54 buffer += strlen(field);
55 while (isspace(*buffer))
56 buffer++;
57
58 /* Check if 'value' is present in the buffer as a separate word */
59 while ((p = strstr(buffer, value))) {
60 if (p > buffer && !isspace(*(p - 1))) {
61 buffer++;
62 continue;
63 }
64 p += strlen(value);
65 if (*p != 0 && !isspace(*p)) {
66 buffer++;
67 continue;
68 }
69 return 1;
70 }
71 return 0;
72 }
73
74 LOCAL(int)
parse_proc_cpuinfo(int bufsize)75 parse_proc_cpuinfo(int bufsize)
76 {
77 char *buffer = (char *)malloc(bufsize);
78 FILE *fd;
79
80 if (!buffer)
81 return 0;
82
83 fd = fopen("/proc/cpuinfo", "r");
84 if (fd) {
85 while (fgets(buffer, bufsize, fd)) {
86 if (!strchr(buffer, '\n') && !feof(fd)) {
87 /* "impossible" happened - insufficient size of the buffer! */
88 fclose(fd);
89 free(buffer);
90 return 0;
91 }
92 if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
93 check_cpuinfo(buffer, "CPU part", "0xd07"))
94 /* The Cortex-A53 has a slow tbl implementation. We can gain a few
95 percent speedup by disabling the use of that instruction. The
96 speedup on Cortex-A57 is more subtle but still measurable. */
97 simd_features &= ~JSIMD_FASTTBL;
98 else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
99 /* The SIMD version of Huffman encoding is slower than the C version on
100 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that
101 CPU. */
102 simd_huffman = simd_features = 0;
103 }
104 fclose(fd);
105 }
106 free(buffer);
107 return 1;
108 }
109
110 #endif
111
112 /*
113 * Check what SIMD accelerations are supported.
114 *
115 * FIXME: This code is racy under a multi-threaded environment.
116 */
117
118 /*
119 * Armv8 architectures support Neon extensions by default.
120 * It is no longer optional as it was with Armv7.
121 */
122
123
124 LOCAL(void)
init_simd(void)125 init_simd(void)
126 {
127 #ifndef NO_GETENV
128 char *env = NULL;
129 #endif
130 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
131 int bufsize = 1024; /* an initial guess for the line buffer size limit */
132 #endif
133
134 if (simd_support != ~0U)
135 return;
136
137 simd_support = 0;
138
139 simd_support |= JSIMD_NEON;
140 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
141 while (!parse_proc_cpuinfo(bufsize)) {
142 bufsize *= 2;
143 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
144 break;
145 }
146 #endif
147
148 #ifndef NO_GETENV
149 /* Force different settings through environment variables */
150 env = getenv("JSIMD_FORCENEON");
151 if ((env != NULL) && (strcmp(env, "1") == 0))
152 simd_support = JSIMD_NEON;
153 env = getenv("JSIMD_FORCENONE");
154 if ((env != NULL) && (strcmp(env, "1") == 0))
155 simd_support = 0;
156 env = getenv("JSIMD_NOHUFFENC");
157 if ((env != NULL) && (strcmp(env, "1") == 0))
158 simd_huffman = 0;
159 env = getenv("JSIMD_FASTLD3");
160 if ((env != NULL) && (strcmp(env, "1") == 0))
161 simd_features |= JSIMD_FASTLD3;
162 if ((env != NULL) && (strcmp(env, "0") == 0))
163 simd_features &= ~JSIMD_FASTLD3;
164 env = getenv("JSIMD_FASTST3");
165 if ((env != NULL) && (strcmp(env, "1") == 0))
166 simd_features |= JSIMD_FASTST3;
167 if ((env != NULL) && (strcmp(env, "0") == 0))
168 simd_features &= ~JSIMD_FASTST3;
169 #endif
170 }
171
172 GLOBAL(int)
jsimd_can_rgb_ycc(void)173 jsimd_can_rgb_ycc(void)
174 {
175 init_simd();
176
177 /* The code is optimised for these values only */
178 if (BITS_IN_JSAMPLE != 8)
179 return 0;
180 if (sizeof(JDIMENSION) != 4)
181 return 0;
182 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
183 return 0;
184
185 if (simd_support & JSIMD_NEON)
186 return 1;
187
188 return 0;
189 }
190
191 GLOBAL(int)
jsimd_can_rgb_gray(void)192 jsimd_can_rgb_gray(void)
193 {
194 init_simd();
195
196 /* The code is optimised for these values only */
197 if (BITS_IN_JSAMPLE != 8)
198 return 0;
199 if (sizeof(JDIMENSION) != 4)
200 return 0;
201 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
202 return 0;
203
204 if (simd_support & JSIMD_NEON)
205 return 1;
206
207 return 0;
208 }
209
210 GLOBAL(int)
jsimd_can_ycc_rgb(void)211 jsimd_can_ycc_rgb(void)
212 {
213 init_simd();
214
215 /* The code is optimised for these values only */
216 if (BITS_IN_JSAMPLE != 8)
217 return 0;
218 if (sizeof(JDIMENSION) != 4)
219 return 0;
220 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
221 return 0;
222
223 if (simd_support & JSIMD_NEON)
224 return 1;
225
226 return 0;
227 }
228
229 GLOBAL(int)
jsimd_can_ycc_rgb565(void)230 jsimd_can_ycc_rgb565(void)
231 {
232 init_simd();
233
234 /* The code is optimised for these values only */
235 if (BITS_IN_JSAMPLE != 8)
236 return 0;
237 if (sizeof(JDIMENSION) != 4)
238 return 0;
239
240 if (simd_support & JSIMD_NEON)
241 return 1;
242
243 return 0;
244 }
245
246 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)247 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
248 JSAMPIMAGE output_buf, JDIMENSION output_row,
249 int num_rows)
250 {
251 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
252
253 switch (cinfo->in_color_space) {
254 case JCS_EXT_RGB:
255 #ifndef NEON_INTRINSICS
256 if (simd_features & JSIMD_FASTLD3)
257 #endif
258 neonfct = jsimd_extrgb_ycc_convert_neon;
259 #ifndef NEON_INTRINSICS
260 else
261 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
262 #endif
263 break;
264 case JCS_EXT_RGBX:
265 case JCS_EXT_RGBA:
266 neonfct = jsimd_extrgbx_ycc_convert_neon;
267 break;
268 case JCS_EXT_BGR:
269 #ifndef NEON_INTRINSICS
270 if (simd_features & JSIMD_FASTLD3)
271 #endif
272 neonfct = jsimd_extbgr_ycc_convert_neon;
273 #ifndef NEON_INTRINSICS
274 else
275 neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
276 #endif
277 break;
278 case JCS_EXT_BGRX:
279 case JCS_EXT_BGRA:
280 neonfct = jsimd_extbgrx_ycc_convert_neon;
281 break;
282 case JCS_EXT_XBGR:
283 case JCS_EXT_ABGR:
284 neonfct = jsimd_extxbgr_ycc_convert_neon;
285 break;
286 case JCS_EXT_XRGB:
287 case JCS_EXT_ARGB:
288 neonfct = jsimd_extxrgb_ycc_convert_neon;
289 break;
290 default:
291 #ifndef NEON_INTRINSICS
292 if (simd_features & JSIMD_FASTLD3)
293 #endif
294 neonfct = jsimd_extrgb_ycc_convert_neon;
295 #ifndef NEON_INTRINSICS
296 else
297 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
298 #endif
299 break;
300 }
301
302 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
303 }
304
305 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)306 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
307 JSAMPIMAGE output_buf, JDIMENSION output_row,
308 int num_rows)
309 {
310 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
311
312 switch (cinfo->in_color_space) {
313 case JCS_EXT_RGB:
314 neonfct = jsimd_extrgb_gray_convert_neon;
315 break;
316 case JCS_EXT_RGBX:
317 case JCS_EXT_RGBA:
318 neonfct = jsimd_extrgbx_gray_convert_neon;
319 break;
320 case JCS_EXT_BGR:
321 neonfct = jsimd_extbgr_gray_convert_neon;
322 break;
323 case JCS_EXT_BGRX:
324 case JCS_EXT_BGRA:
325 neonfct = jsimd_extbgrx_gray_convert_neon;
326 break;
327 case JCS_EXT_XBGR:
328 case JCS_EXT_ABGR:
329 neonfct = jsimd_extxbgr_gray_convert_neon;
330 break;
331 case JCS_EXT_XRGB:
332 case JCS_EXT_ARGB:
333 neonfct = jsimd_extxrgb_gray_convert_neon;
334 break;
335 default:
336 neonfct = jsimd_extrgb_gray_convert_neon;
337 break;
338 }
339
340 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
341 }
342
343 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)344 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
345 JDIMENSION input_row, JSAMPARRAY output_buf,
346 int num_rows)
347 {
348 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
349
350 switch (cinfo->out_color_space) {
351 case JCS_EXT_RGB:
352 #ifndef NEON_INTRINSICS
353 if (simd_features & JSIMD_FASTST3)
354 #endif
355 neonfct = jsimd_ycc_extrgb_convert_neon;
356 #ifndef NEON_INTRINSICS
357 else
358 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
359 #endif
360 break;
361 case JCS_EXT_RGBX:
362 case JCS_EXT_RGBA:
363 neonfct = jsimd_ycc_extrgbx_convert_neon;
364 break;
365 case JCS_EXT_BGR:
366 #ifndef NEON_INTRINSICS
367 if (simd_features & JSIMD_FASTST3)
368 #endif
369 neonfct = jsimd_ycc_extbgr_convert_neon;
370 #ifndef NEON_INTRINSICS
371 else
372 neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
373 #endif
374 break;
375 case JCS_EXT_BGRX:
376 case JCS_EXT_BGRA:
377 neonfct = jsimd_ycc_extbgrx_convert_neon;
378 break;
379 case JCS_EXT_XBGR:
380 case JCS_EXT_ABGR:
381 neonfct = jsimd_ycc_extxbgr_convert_neon;
382 break;
383 case JCS_EXT_XRGB:
384 case JCS_EXT_ARGB:
385 neonfct = jsimd_ycc_extxrgb_convert_neon;
386 break;
387 default:
388 #ifndef NEON_INTRINSICS
389 if (simd_features & JSIMD_FASTST3)
390 #endif
391 neonfct = jsimd_ycc_extrgb_convert_neon;
392 #ifndef NEON_INTRINSICS
393 else
394 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
395 #endif
396 break;
397 }
398
399 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
400 }
401
402 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)403 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
404 JDIMENSION input_row, JSAMPARRAY output_buf,
405 int num_rows)
406 {
407 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
408 output_buf, num_rows);
409 }
410
411 GLOBAL(int)
jsimd_can_h2v2_downsample(void)412 jsimd_can_h2v2_downsample(void)
413 {
414 init_simd();
415
416 /* The code is optimised for these values only */
417 if (BITS_IN_JSAMPLE != 8)
418 return 0;
419 if (DCTSIZE != 8)
420 return 0;
421 if (sizeof(JDIMENSION) != 4)
422 return 0;
423
424 if (simd_support & JSIMD_NEON)
425 return 1;
426
427 return 0;
428 }
429
430 GLOBAL(int)
jsimd_can_h2v1_downsample(void)431 jsimd_can_h2v1_downsample(void)
432 {
433 init_simd();
434
435 /* The code is optimised for these values only */
436 if (BITS_IN_JSAMPLE != 8)
437 return 0;
438 if (DCTSIZE != 8)
439 return 0;
440 if (sizeof(JDIMENSION) != 4)
441 return 0;
442
443 if (simd_support & JSIMD_NEON)
444 return 1;
445
446 return 0;
447 }
448
449 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)450 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
451 JSAMPARRAY input_data, JSAMPARRAY output_data)
452 {
453 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
454 compptr->v_samp_factor, compptr->width_in_blocks,
455 input_data, output_data);
456 }
457
458 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)459 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
460 JSAMPARRAY input_data, JSAMPARRAY output_data)
461 {
462 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
463 compptr->v_samp_factor, compptr->width_in_blocks,
464 input_data, output_data);
465 }
466
467 GLOBAL(int)
jsimd_can_h2v2_upsample(void)468 jsimd_can_h2v2_upsample(void)
469 {
470 init_simd();
471
472 /* The code is optimised for these values only */
473 if (BITS_IN_JSAMPLE != 8)
474 return 0;
475 if (sizeof(JDIMENSION) != 4)
476 return 0;
477
478 if (simd_support & JSIMD_NEON)
479 return 1;
480
481 return 0;
482 }
483
484 GLOBAL(int)
jsimd_can_h2v1_upsample(void)485 jsimd_can_h2v1_upsample(void)
486 {
487 init_simd();
488
489 /* The code is optimised for these values only */
490 if (BITS_IN_JSAMPLE != 8)
491 return 0;
492 if (sizeof(JDIMENSION) != 4)
493 return 0;
494 if (simd_support & JSIMD_NEON)
495 return 1;
496
497 return 0;
498 }
499
500 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)501 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
502 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
503 {
504 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
505 input_data, output_data_ptr);
506 }
507
508 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)509 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
510 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
511 {
512 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
513 input_data, output_data_ptr);
514 }
515
516 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)517 jsimd_can_h2v2_fancy_upsample(void)
518 {
519 init_simd();
520
521 /* The code is optimised for these values only */
522 if (BITS_IN_JSAMPLE != 8)
523 return 0;
524 if (sizeof(JDIMENSION) != 4)
525 return 0;
526
527 if (simd_support & JSIMD_NEON)
528 return 1;
529
530 return 0;
531 }
532
533 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)534 jsimd_can_h2v1_fancy_upsample(void)
535 {
536 init_simd();
537
538 /* The code is optimised for these values only */
539 if (BITS_IN_JSAMPLE != 8)
540 return 0;
541 if (sizeof(JDIMENSION) != 4)
542 return 0;
543
544 if (simd_support & JSIMD_NEON)
545 return 1;
546
547 return 0;
548 }
549
550 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)551 jsimd_can_h1v2_fancy_upsample(void)
552 {
553 init_simd();
554
555 /* The code is optimised for these values only */
556 if (BITS_IN_JSAMPLE != 8)
557 return 0;
558 if (sizeof(JDIMENSION) != 4)
559 return 0;
560
561 if (simd_support & JSIMD_NEON)
562 return 1;
563
564 return 0;
565 }
566
567 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)568 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
569 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
570 {
571 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
572 compptr->downsampled_width, input_data,
573 output_data_ptr);
574 }
575
576 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)577 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
578 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
579 {
580 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
581 compptr->downsampled_width, input_data,
582 output_data_ptr);
583 }
584
585 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)586 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
587 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
588 {
589 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
590 compptr->downsampled_width, input_data,
591 output_data_ptr);
592 }
593
594 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)595 jsimd_can_h2v2_merged_upsample(void)
596 {
597 init_simd();
598
599 /* The code is optimised for these values only */
600 if (BITS_IN_JSAMPLE != 8)
601 return 0;
602 if (sizeof(JDIMENSION) != 4)
603 return 0;
604
605 if (simd_support & JSIMD_NEON)
606 return 1;
607
608 return 0;
609 }
610
611 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)612 jsimd_can_h2v1_merged_upsample(void)
613 {
614 init_simd();
615
616 /* The code is optimised for these values only */
617 if (BITS_IN_JSAMPLE != 8)
618 return 0;
619 if (sizeof(JDIMENSION) != 4)
620 return 0;
621
622 if (simd_support & JSIMD_NEON)
623 return 1;
624
625 return 0;
626 }
627
628 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)629 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
630 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
631 {
632 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
633
634 switch (cinfo->out_color_space) {
635 case JCS_EXT_RGB:
636 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
637 break;
638 case JCS_EXT_RGBX:
639 case JCS_EXT_RGBA:
640 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
641 break;
642 case JCS_EXT_BGR:
643 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
644 break;
645 case JCS_EXT_BGRX:
646 case JCS_EXT_BGRA:
647 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
648 break;
649 case JCS_EXT_XBGR:
650 case JCS_EXT_ABGR:
651 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
652 break;
653 case JCS_EXT_XRGB:
654 case JCS_EXT_ARGB:
655 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
656 break;
657 default:
658 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
659 break;
660 }
661
662 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663 }
664
665 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)666 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
667 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
668 {
669 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
670
671 switch (cinfo->out_color_space) {
672 case JCS_EXT_RGB:
673 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
674 break;
675 case JCS_EXT_RGBX:
676 case JCS_EXT_RGBA:
677 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
678 break;
679 case JCS_EXT_BGR:
680 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
681 break;
682 case JCS_EXT_BGRX:
683 case JCS_EXT_BGRA:
684 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
685 break;
686 case JCS_EXT_XBGR:
687 case JCS_EXT_ABGR:
688 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
689 break;
690 case JCS_EXT_XRGB:
691 case JCS_EXT_ARGB:
692 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
693 break;
694 default:
695 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
696 break;
697 }
698
699 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
700 }
701
702 GLOBAL(int)
jsimd_can_convsamp(void)703 jsimd_can_convsamp(void)
704 {
705 init_simd();
706
707 /* The code is optimised for these values only */
708 if (DCTSIZE != 8)
709 return 0;
710 if (BITS_IN_JSAMPLE != 8)
711 return 0;
712 if (sizeof(JDIMENSION) != 4)
713 return 0;
714 if (sizeof(DCTELEM) != 2)
715 return 0;
716
717 if (simd_support & JSIMD_NEON)
718 return 1;
719
720 return 0;
721 }
722
723 GLOBAL(int)
jsimd_can_convsamp_float(void)724 jsimd_can_convsamp_float(void)
725 {
726 return 0;
727 }
728
729 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)730 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
731 DCTELEM *workspace)
732 {
733 jsimd_convsamp_neon(sample_data, start_col, workspace);
734 }
735
736 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)737 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
738 FAST_FLOAT *workspace)
739 {
740 }
741
742 GLOBAL(int)
jsimd_can_fdct_islow(void)743 jsimd_can_fdct_islow(void)
744 {
745 init_simd();
746
747 /* The code is optimised for these values only */
748 if (DCTSIZE != 8)
749 return 0;
750 if (sizeof(DCTELEM) != 2)
751 return 0;
752
753 if (simd_support & JSIMD_NEON)
754 return 1;
755
756 return 0;
757 }
758
759 GLOBAL(int)
jsimd_can_fdct_ifast(void)760 jsimd_can_fdct_ifast(void)
761 {
762 init_simd();
763
764 /* The code is optimised for these values only */
765 if (DCTSIZE != 8)
766 return 0;
767 if (sizeof(DCTELEM) != 2)
768 return 0;
769
770 if (simd_support & JSIMD_NEON)
771 return 1;
772
773 return 0;
774 }
775
776 GLOBAL(int)
jsimd_can_fdct_float(void)777 jsimd_can_fdct_float(void)
778 {
779 return 0;
780 }
781
782 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)783 jsimd_fdct_islow(DCTELEM *data)
784 {
785 jsimd_fdct_islow_neon(data);
786 }
787
788 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)789 jsimd_fdct_ifast(DCTELEM *data)
790 {
791 jsimd_fdct_ifast_neon(data);
792 }
793
794 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)795 jsimd_fdct_float(FAST_FLOAT *data)
796 {
797 }
798
799 GLOBAL(int)
jsimd_can_quantize(void)800 jsimd_can_quantize(void)
801 {
802 init_simd();
803
804 /* The code is optimised for these values only */
805 if (DCTSIZE != 8)
806 return 0;
807 if (sizeof(JCOEF) != 2)
808 return 0;
809 if (sizeof(DCTELEM) != 2)
810 return 0;
811
812 if (simd_support & JSIMD_NEON)
813 return 1;
814
815 return 0;
816 }
817
818 GLOBAL(int)
jsimd_can_quantize_float(void)819 jsimd_can_quantize_float(void)
820 {
821 return 0;
822 }
823
824 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)825 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
826 {
827 jsimd_quantize_neon(coef_block, divisors, workspace);
828 }
829
830 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)831 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
832 FAST_FLOAT *workspace)
833 {
834 }
835
836 GLOBAL(int)
jsimd_can_idct_2x2(void)837 jsimd_can_idct_2x2(void)
838 {
839 init_simd();
840
841 /* The code is optimised for these values only */
842 if (DCTSIZE != 8)
843 return 0;
844 if (sizeof(JCOEF) != 2)
845 return 0;
846 if (BITS_IN_JSAMPLE != 8)
847 return 0;
848 if (sizeof(JDIMENSION) != 4)
849 return 0;
850 if (sizeof(ISLOW_MULT_TYPE) != 2)
851 return 0;
852
853 if (simd_support & JSIMD_NEON)
854 return 1;
855
856 return 0;
857 }
858
859 GLOBAL(int)
jsimd_can_idct_4x4(void)860 jsimd_can_idct_4x4(void)
861 {
862 init_simd();
863
864 /* The code is optimised for these values only */
865 if (DCTSIZE != 8)
866 return 0;
867 if (sizeof(JCOEF) != 2)
868 return 0;
869 if (BITS_IN_JSAMPLE != 8)
870 return 0;
871 if (sizeof(JDIMENSION) != 4)
872 return 0;
873 if (sizeof(ISLOW_MULT_TYPE) != 2)
874 return 0;
875
876 if (simd_support & JSIMD_NEON)
877 return 1;
878
879 return 0;
880 }
881
882 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)883 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
884 JCOEFPTR coef_block, JSAMPARRAY output_buf,
885 JDIMENSION output_col)
886 {
887 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
888 }
889
890 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)891 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
892 JCOEFPTR coef_block, JSAMPARRAY output_buf,
893 JDIMENSION output_col)
894 {
895 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
896 }
897
898 GLOBAL(int)
jsimd_can_idct_islow(void)899 jsimd_can_idct_islow(void)
900 {
901 init_simd();
902
903 /* The code is optimised for these values only */
904 if (DCTSIZE != 8)
905 return 0;
906 if (sizeof(JCOEF) != 2)
907 return 0;
908 if (BITS_IN_JSAMPLE != 8)
909 return 0;
910 if (sizeof(JDIMENSION) != 4)
911 return 0;
912 if (sizeof(ISLOW_MULT_TYPE) != 2)
913 return 0;
914
915 if (simd_support & JSIMD_NEON)
916 return 1;
917
918 return 0;
919 }
920
921 GLOBAL(int)
jsimd_can_idct_ifast(void)922 jsimd_can_idct_ifast(void)
923 {
924 init_simd();
925
926 /* The code is optimised for these values only */
927 if (DCTSIZE != 8)
928 return 0;
929 if (sizeof(JCOEF) != 2)
930 return 0;
931 if (BITS_IN_JSAMPLE != 8)
932 return 0;
933 if (sizeof(JDIMENSION) != 4)
934 return 0;
935 if (sizeof(IFAST_MULT_TYPE) != 2)
936 return 0;
937 if (IFAST_SCALE_BITS != 2)
938 return 0;
939
940 if (simd_support & JSIMD_NEON)
941 return 1;
942
943 return 0;
944 }
945
946 GLOBAL(int)
jsimd_can_idct_float(void)947 jsimd_can_idct_float(void)
948 {
949 return 0;
950 }
951
952 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)953 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
954 JCOEFPTR coef_block, JSAMPARRAY output_buf,
955 JDIMENSION output_col)
956 {
957 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
958 output_col);
959 }
960
961 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)962 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
963 JCOEFPTR coef_block, JSAMPARRAY output_buf,
964 JDIMENSION output_col)
965 {
966 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
967 output_col);
968 }
969
970 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)971 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
972 JCOEFPTR coef_block, JSAMPARRAY output_buf,
973 JDIMENSION output_col)
974 {
975 }
976
977 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)978 jsimd_can_huff_encode_one_block(void)
979 {
980 init_simd();
981
982 if (DCTSIZE != 8)
983 return 0;
984 if (sizeof(JCOEF) != 2)
985 return 0;
986
987 if (simd_support & JSIMD_NEON && simd_huffman)
988 return 1;
989
990 return 0;
991 }
992
993 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)994 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
995 int last_dc_val, c_derived_tbl *dctbl,
996 c_derived_tbl *actbl)
997 {
998 #ifndef NEON_INTRINSICS
999 if (simd_features & JSIMD_FASTTBL)
1000 #endif
1001 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
1002 dctbl, actbl);
1003 #ifndef NEON_INTRINSICS
1004 else
1005 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
1006 last_dc_val, dctbl, actbl);
1007 #endif
1008 }
1009
1010 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1011 jsimd_can_encode_mcu_AC_first_prepare(void)
1012 {
1013 init_simd();
1014
1015 if (DCTSIZE != 8)
1016 return 0;
1017 if (sizeof(JCOEF) != 2)
1018 return 0;
1019 if (SIZEOF_SIZE_T != 8)
1020 return 0;
1021
1022 if (simd_support & JSIMD_NEON)
1023 return 1;
1024
1025 return 0;
1026 }
1027
1028 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1029 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1030 const int *jpeg_natural_order_start, int Sl,
1031 int Al, JCOEF *values, size_t *zerobits)
1032 {
1033 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1034 Sl, Al, values, zerobits);
1035 }
1036
1037 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1038 jsimd_can_encode_mcu_AC_refine_prepare(void)
1039 {
1040 init_simd();
1041
1042 if (DCTSIZE != 8)
1043 return 0;
1044 if (sizeof(JCOEF) != 2)
1045 return 0;
1046 if (SIZEOF_SIZE_T != 8)
1047 return 0;
1048
1049 if (simd_support & JSIMD_NEON)
1050 return 1;
1051
1052 return 0;
1053 }
1054
1055 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1056 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1057 const int *jpeg_natural_order_start, int Sl,
1058 int Al, JCOEF *absvalues, size_t *bits)
1059 {
1060 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1061 jpeg_natural_order_start,
1062 Sl, Al, absvalues, bits);
1063 }
1064