• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * jsimd_arm64.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, D. R. Commander.
7  * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8  * Copyright (C) 2020, Arm Limited.
9  *
10  * Based on the x86 SIMD extension for IJG JPEG library,
11  * Copyright (C) 1999-2006, MIYASAKA Masaru.
12  * For conditions of distribution and use, see copyright notice in jsimdext.inc
13  *
14  * This file contains the interface between the "normal" portions
15  * of the library and the SIMD implementations when running on a
16  * 64-bit Arm architecture.
17  */
18 
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26 #include "jconfigint.h"
27 
28 #include <stdio.h>
29 #include <string.h>
30 #include <ctype.h>
31 
32 #define JSIMD_FASTLD3  1
33 #define JSIMD_FASTST3  2
34 #define JSIMD_FASTTBL  4
35 
36 static unsigned int simd_support = ~0;
37 static unsigned int simd_huffman = 1;
38 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
39                                     JSIMD_FASTTBL;
40 
41 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
42 
43 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT  (1024 * 1024)
44 
45 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)46 check_cpuinfo(char *buffer, const char *field, char *value)
47 {
48   char *p;
49 
50   if (*value == 0)
51     return 0;
52   if (strncmp(buffer, field, strlen(field)) != 0)
53     return 0;
54   buffer += strlen(field);
55   while (isspace(*buffer))
56     buffer++;
57 
58   /* Check if 'value' is present in the buffer as a separate word */
59   while ((p = strstr(buffer, value))) {
60     if (p > buffer && !isspace(*(p - 1))) {
61       buffer++;
62       continue;
63     }
64     p += strlen(value);
65     if (*p != 0 && !isspace(*p)) {
66       buffer++;
67       continue;
68     }
69     return 1;
70   }
71   return 0;
72 }
73 
74 LOCAL(int)
parse_proc_cpuinfo(int bufsize)75 parse_proc_cpuinfo(int bufsize)
76 {
77   char *buffer = (char *)malloc(bufsize);
78   FILE *fd;
79 
80   if (!buffer)
81     return 0;
82 
83   fd = fopen("/proc/cpuinfo", "r");
84   if (fd) {
85     while (fgets(buffer, bufsize, fd)) {
86       if (!strchr(buffer, '\n') && !feof(fd)) {
87         /* "impossible" happened - insufficient size of the buffer! */
88         fclose(fd);
89         free(buffer);
90         return 0;
91       }
92       if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
93           check_cpuinfo(buffer, "CPU part", "0xd07"))
94         /* The Cortex-A53 has a slow tbl implementation.  We can gain a few
95            percent speedup by disabling the use of that instruction.  The
96            speedup on Cortex-A57 is more subtle but still measurable. */
97         simd_features &= ~JSIMD_FASTTBL;
98       else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
99         /* The SIMD version of Huffman encoding is slower than the C version on
100            Cavium ThunderX.  Also, ld3 and st3 are abyssmally slow on that
101            CPU. */
102         simd_huffman = simd_features = 0;
103     }
104     fclose(fd);
105   }
106   free(buffer);
107   return 1;
108 }
109 
110 #endif
111 
112 /*
113  * Check what SIMD accelerations are supported.
114  *
115  * FIXME: This code is racy under a multi-threaded environment.
116  */
117 
118 /*
119  * Armv8 architectures support Neon extensions by default.
120  * It is no longer optional as it was with Armv7.
121  */
122 
123 
124 LOCAL(void)
init_simd(void)125 init_simd(void)
126 {
127 #ifndef NO_GETENV
128   char *env = NULL;
129 #endif
130 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
131   int bufsize = 1024; /* an initial guess for the line buffer size limit */
132 #endif
133 
134   if (simd_support != ~0U)
135     return;
136 
137   simd_support = 0;
138 
139   simd_support |= JSIMD_NEON;
140 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
141   while (!parse_proc_cpuinfo(bufsize)) {
142     bufsize *= 2;
143     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
144       break;
145   }
146 #endif
147 
148 #ifndef NO_GETENV
149   /* Force different settings through environment variables */
150   env = getenv("JSIMD_FORCENEON");
151   if ((env != NULL) && (strcmp(env, "1") == 0))
152     simd_support = JSIMD_NEON;
153   env = getenv("JSIMD_FORCENONE");
154   if ((env != NULL) && (strcmp(env, "1") == 0))
155     simd_support = 0;
156   env = getenv("JSIMD_NOHUFFENC");
157   if ((env != NULL) && (strcmp(env, "1") == 0))
158     simd_huffman = 0;
159   env = getenv("JSIMD_FASTLD3");
160   if ((env != NULL) && (strcmp(env, "1") == 0))
161     simd_features |= JSIMD_FASTLD3;
162   if ((env != NULL) && (strcmp(env, "0") == 0))
163     simd_features &= ~JSIMD_FASTLD3;
164   env = getenv("JSIMD_FASTST3");
165   if ((env != NULL) && (strcmp(env, "1") == 0))
166     simd_features |= JSIMD_FASTST3;
167   if ((env != NULL) && (strcmp(env, "0") == 0))
168     simd_features &= ~JSIMD_FASTST3;
169 #endif
170 }
171 
172 GLOBAL(int)
jsimd_can_rgb_ycc(void)173 jsimd_can_rgb_ycc(void)
174 {
175   init_simd();
176 
177   /* The code is optimised for these values only */
178   if (BITS_IN_JSAMPLE != 8)
179     return 0;
180   if (sizeof(JDIMENSION) != 4)
181     return 0;
182   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
183     return 0;
184 
185   if (simd_support & JSIMD_NEON)
186     return 1;
187 
188   return 0;
189 }
190 
191 GLOBAL(int)
jsimd_can_rgb_gray(void)192 jsimd_can_rgb_gray(void)
193 {
194   init_simd();
195 
196   /* The code is optimised for these values only */
197   if (BITS_IN_JSAMPLE != 8)
198     return 0;
199   if (sizeof(JDIMENSION) != 4)
200     return 0;
201   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
202     return 0;
203 
204   if (simd_support & JSIMD_NEON)
205     return 1;
206 
207   return 0;
208 }
209 
210 GLOBAL(int)
jsimd_can_ycc_rgb(void)211 jsimd_can_ycc_rgb(void)
212 {
213   init_simd();
214 
215   /* The code is optimised for these values only */
216   if (BITS_IN_JSAMPLE != 8)
217     return 0;
218   if (sizeof(JDIMENSION) != 4)
219     return 0;
220   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
221     return 0;
222 
223   if (simd_support & JSIMD_NEON)
224     return 1;
225 
226   return 0;
227 }
228 
229 GLOBAL(int)
jsimd_can_ycc_rgb565(void)230 jsimd_can_ycc_rgb565(void)
231 {
232   init_simd();
233 
234   /* The code is optimised for these values only */
235   if (BITS_IN_JSAMPLE != 8)
236     return 0;
237   if (sizeof(JDIMENSION) != 4)
238     return 0;
239 
240   if (simd_support & JSIMD_NEON)
241     return 1;
242 
243   return 0;
244 }
245 
246 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)247 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
248                       JSAMPIMAGE output_buf, JDIMENSION output_row,
249                       int num_rows)
250 {
251   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
252 
253   switch (cinfo->in_color_space) {
254   case JCS_EXT_RGB:
255 #ifndef NEON_INTRINSICS
256     if (simd_features & JSIMD_FASTLD3)
257 #endif
258       neonfct = jsimd_extrgb_ycc_convert_neon;
259 #ifndef NEON_INTRINSICS
260     else
261       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
262 #endif
263     break;
264   case JCS_EXT_RGBX:
265   case JCS_EXT_RGBA:
266     neonfct = jsimd_extrgbx_ycc_convert_neon;
267     break;
268   case JCS_EXT_BGR:
269 #ifndef NEON_INTRINSICS
270     if (simd_features & JSIMD_FASTLD3)
271 #endif
272       neonfct = jsimd_extbgr_ycc_convert_neon;
273 #ifndef NEON_INTRINSICS
274     else
275       neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
276 #endif
277     break;
278   case JCS_EXT_BGRX:
279   case JCS_EXT_BGRA:
280     neonfct = jsimd_extbgrx_ycc_convert_neon;
281     break;
282   case JCS_EXT_XBGR:
283   case JCS_EXT_ABGR:
284     neonfct = jsimd_extxbgr_ycc_convert_neon;
285     break;
286   case JCS_EXT_XRGB:
287   case JCS_EXT_ARGB:
288     neonfct = jsimd_extxrgb_ycc_convert_neon;
289     break;
290   default:
291 #ifndef NEON_INTRINSICS
292     if (simd_features & JSIMD_FASTLD3)
293 #endif
294       neonfct = jsimd_extrgb_ycc_convert_neon;
295 #ifndef NEON_INTRINSICS
296     else
297       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
298 #endif
299     break;
300   }
301 
302   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
303 }
304 
305 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)306 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
307                        JSAMPIMAGE output_buf, JDIMENSION output_row,
308                        int num_rows)
309 {
310   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
311 
312   switch (cinfo->in_color_space) {
313   case JCS_EXT_RGB:
314     neonfct = jsimd_extrgb_gray_convert_neon;
315     break;
316   case JCS_EXT_RGBX:
317   case JCS_EXT_RGBA:
318     neonfct = jsimd_extrgbx_gray_convert_neon;
319     break;
320   case JCS_EXT_BGR:
321     neonfct = jsimd_extbgr_gray_convert_neon;
322     break;
323   case JCS_EXT_BGRX:
324   case JCS_EXT_BGRA:
325     neonfct = jsimd_extbgrx_gray_convert_neon;
326     break;
327   case JCS_EXT_XBGR:
328   case JCS_EXT_ABGR:
329     neonfct = jsimd_extxbgr_gray_convert_neon;
330     break;
331   case JCS_EXT_XRGB:
332   case JCS_EXT_ARGB:
333     neonfct = jsimd_extxrgb_gray_convert_neon;
334     break;
335   default:
336     neonfct = jsimd_extrgb_gray_convert_neon;
337     break;
338   }
339 
340   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
341 }
342 
343 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)344 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
345                       JDIMENSION input_row, JSAMPARRAY output_buf,
346                       int num_rows)
347 {
348   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
349 
350   switch (cinfo->out_color_space) {
351   case JCS_EXT_RGB:
352 #ifndef NEON_INTRINSICS
353     if (simd_features & JSIMD_FASTST3)
354 #endif
355       neonfct = jsimd_ycc_extrgb_convert_neon;
356 #ifndef NEON_INTRINSICS
357     else
358       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
359 #endif
360     break;
361   case JCS_EXT_RGBX:
362   case JCS_EXT_RGBA:
363     neonfct = jsimd_ycc_extrgbx_convert_neon;
364     break;
365   case JCS_EXT_BGR:
366 #ifndef NEON_INTRINSICS
367     if (simd_features & JSIMD_FASTST3)
368 #endif
369       neonfct = jsimd_ycc_extbgr_convert_neon;
370 #ifndef NEON_INTRINSICS
371     else
372       neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
373 #endif
374     break;
375   case JCS_EXT_BGRX:
376   case JCS_EXT_BGRA:
377     neonfct = jsimd_ycc_extbgrx_convert_neon;
378     break;
379   case JCS_EXT_XBGR:
380   case JCS_EXT_ABGR:
381     neonfct = jsimd_ycc_extxbgr_convert_neon;
382     break;
383   case JCS_EXT_XRGB:
384   case JCS_EXT_ARGB:
385     neonfct = jsimd_ycc_extxrgb_convert_neon;
386     break;
387   default:
388 #ifndef NEON_INTRINSICS
389     if (simd_features & JSIMD_FASTST3)
390 #endif
391       neonfct = jsimd_ycc_extrgb_convert_neon;
392 #ifndef NEON_INTRINSICS
393     else
394       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
395 #endif
396     break;
397   }
398 
399   neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
400 }
401 
402 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)403 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
404                          JDIMENSION input_row, JSAMPARRAY output_buf,
405                          int num_rows)
406 {
407   jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
408                                 output_buf, num_rows);
409 }
410 
411 GLOBAL(int)
jsimd_can_h2v2_downsample(void)412 jsimd_can_h2v2_downsample(void)
413 {
414   init_simd();
415 
416   /* The code is optimised for these values only */
417   if (BITS_IN_JSAMPLE != 8)
418     return 0;
419   if (DCTSIZE != 8)
420     return 0;
421   if (sizeof(JDIMENSION) != 4)
422     return 0;
423 
424   if (simd_support & JSIMD_NEON)
425     return 1;
426 
427   return 0;
428 }
429 
430 GLOBAL(int)
jsimd_can_h2v1_downsample(void)431 jsimd_can_h2v1_downsample(void)
432 {
433   init_simd();
434 
435   /* The code is optimised for these values only */
436   if (BITS_IN_JSAMPLE != 8)
437     return 0;
438   if (DCTSIZE != 8)
439     return 0;
440   if (sizeof(JDIMENSION) != 4)
441     return 0;
442 
443   if (simd_support & JSIMD_NEON)
444     return 1;
445 
446   return 0;
447 }
448 
449 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)450 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
451                       JSAMPARRAY input_data, JSAMPARRAY output_data)
452 {
453   jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
454                              compptr->v_samp_factor, compptr->width_in_blocks,
455                              input_data, output_data);
456 }
457 
458 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)459 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
460                       JSAMPARRAY input_data, JSAMPARRAY output_data)
461 {
462   jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
463                              compptr->v_samp_factor, compptr->width_in_blocks,
464                              input_data, output_data);
465 }
466 
467 GLOBAL(int)
jsimd_can_h2v2_upsample(void)468 jsimd_can_h2v2_upsample(void)
469 {
470   init_simd();
471 
472   /* The code is optimised for these values only */
473   if (BITS_IN_JSAMPLE != 8)
474     return 0;
475   if (sizeof(JDIMENSION) != 4)
476     return 0;
477 
478   if (simd_support & JSIMD_NEON)
479     return 1;
480 
481   return 0;
482 }
483 
484 GLOBAL(int)
jsimd_can_h2v1_upsample(void)485 jsimd_can_h2v1_upsample(void)
486 {
487   init_simd();
488 
489   /* The code is optimised for these values only */
490   if (BITS_IN_JSAMPLE != 8)
491     return 0;
492   if (sizeof(JDIMENSION) != 4)
493     return 0;
494   if (simd_support & JSIMD_NEON)
495     return 1;
496 
497   return 0;
498 }
499 
500 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)501 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
502                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
503 {
504   jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
505                            input_data, output_data_ptr);
506 }
507 
508 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)509 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
510                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
511 {
512   jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
513                            input_data, output_data_ptr);
514 }
515 
516 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)517 jsimd_can_h2v2_fancy_upsample(void)
518 {
519   init_simd();
520 
521   /* The code is optimised for these values only */
522   if (BITS_IN_JSAMPLE != 8)
523     return 0;
524   if (sizeof(JDIMENSION) != 4)
525     return 0;
526 
527   if (simd_support & JSIMD_NEON)
528     return 1;
529 
530   return 0;
531 }
532 
533 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)534 jsimd_can_h2v1_fancy_upsample(void)
535 {
536   init_simd();
537 
538   /* The code is optimised for these values only */
539   if (BITS_IN_JSAMPLE != 8)
540     return 0;
541   if (sizeof(JDIMENSION) != 4)
542     return 0;
543 
544   if (simd_support & JSIMD_NEON)
545     return 1;
546 
547   return 0;
548 }
549 
550 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)551 jsimd_can_h1v2_fancy_upsample(void)
552 {
553   init_simd();
554 
555   /* The code is optimised for these values only */
556   if (BITS_IN_JSAMPLE != 8)
557     return 0;
558   if (sizeof(JDIMENSION) != 4)
559     return 0;
560 
561   if (simd_support & JSIMD_NEON)
562     return 1;
563 
564   return 0;
565 }
566 
567 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)568 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
569                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
570 {
571   jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
572                                  compptr->downsampled_width, input_data,
573                                  output_data_ptr);
574 }
575 
576 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)577 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
578                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
579 {
580   jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
581                                  compptr->downsampled_width, input_data,
582                                  output_data_ptr);
583 }
584 
585 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)586 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
587                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
588 {
589   jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
590                                  compptr->downsampled_width, input_data,
591                                  output_data_ptr);
592 }
593 
594 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)595 jsimd_can_h2v2_merged_upsample(void)
596 {
597   init_simd();
598 
599   /* The code is optimised for these values only */
600   if (BITS_IN_JSAMPLE != 8)
601     return 0;
602   if (sizeof(JDIMENSION) != 4)
603     return 0;
604 
605   if (simd_support & JSIMD_NEON)
606     return 1;
607 
608   return 0;
609 }
610 
611 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)612 jsimd_can_h2v1_merged_upsample(void)
613 {
614   init_simd();
615 
616   /* The code is optimised for these values only */
617   if (BITS_IN_JSAMPLE != 8)
618     return 0;
619   if (sizeof(JDIMENSION) != 4)
620     return 0;
621 
622   if (simd_support & JSIMD_NEON)
623     return 1;
624 
625   return 0;
626 }
627 
628 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)629 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
630                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
631 {
632   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
633 
634   switch (cinfo->out_color_space) {
635     case JCS_EXT_RGB:
636       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
637       break;
638     case JCS_EXT_RGBX:
639     case JCS_EXT_RGBA:
640       neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
641       break;
642     case JCS_EXT_BGR:
643       neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
644       break;
645     case JCS_EXT_BGRX:
646     case JCS_EXT_BGRA:
647       neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
648       break;
649     case JCS_EXT_XBGR:
650     case JCS_EXT_ABGR:
651       neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
652       break;
653     case JCS_EXT_XRGB:
654     case JCS_EXT_ARGB:
655       neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
656       break;
657     default:
658       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
659       break;
660   }
661 
662   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663 }
664 
665 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)666 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
667                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
668 {
669   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
670 
671   switch (cinfo->out_color_space) {
672     case JCS_EXT_RGB:
673       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
674       break;
675     case JCS_EXT_RGBX:
676     case JCS_EXT_RGBA:
677       neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
678       break;
679     case JCS_EXT_BGR:
680       neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
681       break;
682     case JCS_EXT_BGRX:
683     case JCS_EXT_BGRA:
684       neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
685       break;
686     case JCS_EXT_XBGR:
687     case JCS_EXT_ABGR:
688       neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
689       break;
690     case JCS_EXT_XRGB:
691     case JCS_EXT_ARGB:
692       neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
693       break;
694     default:
695       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
696       break;
697   }
698 
699   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
700 }
701 
702 GLOBAL(int)
jsimd_can_convsamp(void)703 jsimd_can_convsamp(void)
704 {
705   init_simd();
706 
707   /* The code is optimised for these values only */
708   if (DCTSIZE != 8)
709     return 0;
710   if (BITS_IN_JSAMPLE != 8)
711     return 0;
712   if (sizeof(JDIMENSION) != 4)
713     return 0;
714   if (sizeof(DCTELEM) != 2)
715     return 0;
716 
717   if (simd_support & JSIMD_NEON)
718     return 1;
719 
720   return 0;
721 }
722 
723 GLOBAL(int)
jsimd_can_convsamp_float(void)724 jsimd_can_convsamp_float(void)
725 {
726   return 0;
727 }
728 
729 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)730 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
731                DCTELEM *workspace)
732 {
733   jsimd_convsamp_neon(sample_data, start_col, workspace);
734 }
735 
736 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)737 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
738                      FAST_FLOAT *workspace)
739 {
740 }
741 
742 GLOBAL(int)
jsimd_can_fdct_islow(void)743 jsimd_can_fdct_islow(void)
744 {
745   init_simd();
746 
747   /* The code is optimised for these values only */
748   if (DCTSIZE != 8)
749     return 0;
750   if (sizeof(DCTELEM) != 2)
751     return 0;
752 
753   if (simd_support & JSIMD_NEON)
754     return 1;
755 
756   return 0;
757 }
758 
759 GLOBAL(int)
jsimd_can_fdct_ifast(void)760 jsimd_can_fdct_ifast(void)
761 {
762   init_simd();
763 
764   /* The code is optimised for these values only */
765   if (DCTSIZE != 8)
766     return 0;
767   if (sizeof(DCTELEM) != 2)
768     return 0;
769 
770   if (simd_support & JSIMD_NEON)
771     return 1;
772 
773   return 0;
774 }
775 
776 GLOBAL(int)
jsimd_can_fdct_float(void)777 jsimd_can_fdct_float(void)
778 {
779   return 0;
780 }
781 
782 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)783 jsimd_fdct_islow(DCTELEM *data)
784 {
785   jsimd_fdct_islow_neon(data);
786 }
787 
788 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)789 jsimd_fdct_ifast(DCTELEM *data)
790 {
791   jsimd_fdct_ifast_neon(data);
792 }
793 
794 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)795 jsimd_fdct_float(FAST_FLOAT *data)
796 {
797 }
798 
799 GLOBAL(int)
jsimd_can_quantize(void)800 jsimd_can_quantize(void)
801 {
802   init_simd();
803 
804   /* The code is optimised for these values only */
805   if (DCTSIZE != 8)
806     return 0;
807   if (sizeof(JCOEF) != 2)
808     return 0;
809   if (sizeof(DCTELEM) != 2)
810     return 0;
811 
812   if (simd_support & JSIMD_NEON)
813     return 1;
814 
815   return 0;
816 }
817 
818 GLOBAL(int)
jsimd_can_quantize_float(void)819 jsimd_can_quantize_float(void)
820 {
821   return 0;
822 }
823 
824 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)825 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
826 {
827   jsimd_quantize_neon(coef_block, divisors, workspace);
828 }
829 
830 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)831 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
832                      FAST_FLOAT *workspace)
833 {
834 }
835 
836 GLOBAL(int)
jsimd_can_idct_2x2(void)837 jsimd_can_idct_2x2(void)
838 {
839   init_simd();
840 
841   /* The code is optimised for these values only */
842   if (DCTSIZE != 8)
843     return 0;
844   if (sizeof(JCOEF) != 2)
845     return 0;
846   if (BITS_IN_JSAMPLE != 8)
847     return 0;
848   if (sizeof(JDIMENSION) != 4)
849     return 0;
850   if (sizeof(ISLOW_MULT_TYPE) != 2)
851     return 0;
852 
853   if (simd_support & JSIMD_NEON)
854     return 1;
855 
856   return 0;
857 }
858 
859 GLOBAL(int)
jsimd_can_idct_4x4(void)860 jsimd_can_idct_4x4(void)
861 {
862   init_simd();
863 
864   /* The code is optimised for these values only */
865   if (DCTSIZE != 8)
866     return 0;
867   if (sizeof(JCOEF) != 2)
868     return 0;
869   if (BITS_IN_JSAMPLE != 8)
870     return 0;
871   if (sizeof(JDIMENSION) != 4)
872     return 0;
873   if (sizeof(ISLOW_MULT_TYPE) != 2)
874     return 0;
875 
876   if (simd_support & JSIMD_NEON)
877     return 1;
878 
879   return 0;
880 }
881 
882 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)883 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
884                JCOEFPTR coef_block, JSAMPARRAY output_buf,
885                JDIMENSION output_col)
886 {
887   jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
888 }
889 
890 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)891 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
892                JCOEFPTR coef_block, JSAMPARRAY output_buf,
893                JDIMENSION output_col)
894 {
895   jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
896 }
897 
898 GLOBAL(int)
jsimd_can_idct_islow(void)899 jsimd_can_idct_islow(void)
900 {
901   init_simd();
902 
903   /* The code is optimised for these values only */
904   if (DCTSIZE != 8)
905     return 0;
906   if (sizeof(JCOEF) != 2)
907     return 0;
908   if (BITS_IN_JSAMPLE != 8)
909     return 0;
910   if (sizeof(JDIMENSION) != 4)
911     return 0;
912   if (sizeof(ISLOW_MULT_TYPE) != 2)
913     return 0;
914 
915   if (simd_support & JSIMD_NEON)
916     return 1;
917 
918   return 0;
919 }
920 
921 GLOBAL(int)
jsimd_can_idct_ifast(void)922 jsimd_can_idct_ifast(void)
923 {
924   init_simd();
925 
926   /* The code is optimised for these values only */
927   if (DCTSIZE != 8)
928     return 0;
929   if (sizeof(JCOEF) != 2)
930     return 0;
931   if (BITS_IN_JSAMPLE != 8)
932     return 0;
933   if (sizeof(JDIMENSION) != 4)
934     return 0;
935   if (sizeof(IFAST_MULT_TYPE) != 2)
936     return 0;
937   if (IFAST_SCALE_BITS != 2)
938     return 0;
939 
940   if (simd_support & JSIMD_NEON)
941     return 1;
942 
943   return 0;
944 }
945 
946 GLOBAL(int)
jsimd_can_idct_float(void)947 jsimd_can_idct_float(void)
948 {
949   return 0;
950 }
951 
952 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)953 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
954                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
955                  JDIMENSION output_col)
956 {
957   jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
958                         output_col);
959 }
960 
961 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)962 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
963                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
964                  JDIMENSION output_col)
965 {
966   jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
967                         output_col);
968 }
969 
970 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)971 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
972                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
973                  JDIMENSION output_col)
974 {
975 }
976 
977 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)978 jsimd_can_huff_encode_one_block(void)
979 {
980   init_simd();
981 
982   if (DCTSIZE != 8)
983     return 0;
984   if (sizeof(JCOEF) != 2)
985     return 0;
986 
987   if (simd_support & JSIMD_NEON && simd_huffman)
988     return 1;
989 
990   return 0;
991 }
992 
993 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)994 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
995                             int last_dc_val, c_derived_tbl *dctbl,
996                             c_derived_tbl *actbl)
997 {
998 #ifndef NEON_INTRINSICS
999   if (simd_features & JSIMD_FASTTBL)
1000 #endif
1001     return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
1002                                             dctbl, actbl);
1003 #ifndef NEON_INTRINSICS
1004   else
1005     return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
1006                                                     last_dc_val, dctbl, actbl);
1007 #endif
1008 }
1009 
1010 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1011 jsimd_can_encode_mcu_AC_first_prepare(void)
1012 {
1013   init_simd();
1014 
1015   if (DCTSIZE != 8)
1016     return 0;
1017   if (sizeof(JCOEF) != 2)
1018     return 0;
1019   if (SIZEOF_SIZE_T != 8)
1020     return 0;
1021 
1022   if (simd_support & JSIMD_NEON)
1023     return 1;
1024 
1025   return 0;
1026 }
1027 
1028 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1029 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1030                                   const int *jpeg_natural_order_start, int Sl,
1031                                   int Al, JCOEF *values, size_t *zerobits)
1032 {
1033   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1034                                          Sl, Al, values, zerobits);
1035 }
1036 
1037 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1038 jsimd_can_encode_mcu_AC_refine_prepare(void)
1039 {
1040   init_simd();
1041 
1042   if (DCTSIZE != 8)
1043     return 0;
1044   if (sizeof(JCOEF) != 2)
1045     return 0;
1046   if (SIZEOF_SIZE_T != 8)
1047     return 0;
1048 
1049   if (simd_support & JSIMD_NEON)
1050     return 1;
1051 
1052   return 0;
1053 }
1054 
1055 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1056 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1057                                    const int *jpeg_natural_order_start, int Sl,
1058                                    int Al, JCOEF *absvalues, size_t *bits)
1059 {
1060   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1061                                                  jpeg_natural_order_start,
1062                                                  Sl, Al, absvalues, bits);
1063 }
1064