• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * jsimd_arm64.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
7  * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
8  * Copyright (C) 2020, Arm Limited.
9  *
10  * Based on the x86 SIMD extension for IJG JPEG library,
11  * Copyright (C) 1999-2006, MIYASAKA Masaru.
12  * For conditions of distribution and use, see copyright notice in jsimdext.inc
13  *
14  * This file contains the interface between the "normal" portions
15  * of the library and the SIMD implementations when running on a
16  * 64-bit Arm architecture.
17  */
18 
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26 #include "jconfigint.h"
27 
28 #include <ctype.h>
29 
30 #define JSIMD_FASTLD3  1
31 #define JSIMD_FASTST3  2
32 #define JSIMD_FASTTBL  4
33 
34 static unsigned int simd_support = ~0;
35 static unsigned int simd_huffman = 1;
36 static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
37                                     JSIMD_FASTTBL;
38 
39 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
40 
41 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT  (1024 * 1024)
42 
43 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)44 check_cpuinfo(char *buffer, const char *field, char *value)
45 {
46   char *p;
47 
48   if (*value == 0)
49     return 0;
50   if (strncmp(buffer, field, strlen(field)) != 0)
51     return 0;
52   buffer += strlen(field);
53   while (isspace(*buffer))
54     buffer++;
55 
56   /* Check if 'value' is present in the buffer as a separate word */
57   while ((p = strstr(buffer, value))) {
58     if (p > buffer && !isspace(*(p - 1))) {
59       buffer++;
60       continue;
61     }
62     p += strlen(value);
63     if (*p != 0 && !isspace(*p)) {
64       buffer++;
65       continue;
66     }
67     return 1;
68   }
69   return 0;
70 }
71 
72 LOCAL(int)
parse_proc_cpuinfo(int bufsize)73 parse_proc_cpuinfo(int bufsize)
74 {
75   char *buffer = (char *)malloc(bufsize);
76   FILE *fd;
77 
78   if (!buffer)
79     return 0;
80 
81   fd = fopen("/proc/cpuinfo", "r");
82   if (fd) {
83     while (fgets(buffer, bufsize, fd)) {
84       if (!strchr(buffer, '\n') && !feof(fd)) {
85         /* "impossible" happened - insufficient size of the buffer! */
86         fclose(fd);
87         free(buffer);
88         return 0;
89       }
90       if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
91           check_cpuinfo(buffer, "CPU part", "0xd07"))
92         /* The Cortex-A53 has a slow tbl implementation.  We can gain a few
93            percent speedup by disabling the use of that instruction.  The
94            speedup on Cortex-A57 is more subtle but still measurable. */
95         simd_features &= ~JSIMD_FASTTBL;
96       else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
97         /* The SIMD version of Huffman encoding is slower than the C version on
98            Cavium ThunderX.  Also, ld3 and st3 are abyssmally slow on that
99            CPU. */
100         simd_huffman = simd_features = 0;
101     }
102     fclose(fd);
103   }
104   free(buffer);
105   return 1;
106 }
107 
108 #endif
109 
110 /*
111  * Check what SIMD accelerations are supported.
112  *
113  * FIXME: This code is racy under a multi-threaded environment.
114  */
115 
116 /*
117  * Armv8 architectures support Neon extensions by default.
118  * It is no longer optional as it was with Armv7.
119  */
120 
121 
122 LOCAL(void)
init_simd(void)123 init_simd(void)
124 {
125 #ifndef NO_GETENV
126   char env[2] = { 0 };
127 #endif
128 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
129   int bufsize = 1024; /* an initial guess for the line buffer size limit */
130 #endif
131 
132   if (simd_support != ~0U)
133     return;
134 
135   simd_support = 0;
136 
137   simd_support |= JSIMD_NEON;
138 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
139   while (!parse_proc_cpuinfo(bufsize)) {
140     bufsize *= 2;
141     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
142       break;
143   }
144 #endif
145 
146 #ifndef NO_GETENV
147   /* Force different settings through environment variables */
148   if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
149     simd_support = JSIMD_NEON;
150   if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
151     simd_support = 0;
152   if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
153     simd_huffman = 0;
154   if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
155     simd_features |= JSIMD_FASTLD3;
156   if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
157     simd_features &= ~JSIMD_FASTLD3;
158   if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
159     simd_features |= JSIMD_FASTST3;
160   if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
161     simd_features &= ~JSIMD_FASTST3;
162 #endif
163 }
164 
165 GLOBAL(int)
jsimd_can_rgb_ycc(void)166 jsimd_can_rgb_ycc(void)
167 {
168   init_simd();
169 
170   /* The code is optimised for these values only */
171   if (BITS_IN_JSAMPLE != 8)
172     return 0;
173   if (sizeof(JDIMENSION) != 4)
174     return 0;
175   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
176     return 0;
177 
178   if (simd_support & JSIMD_NEON)
179     return 1;
180 
181   return 0;
182 }
183 
184 GLOBAL(int)
jsimd_can_rgb_gray(void)185 jsimd_can_rgb_gray(void)
186 {
187   init_simd();
188 
189   /* The code is optimised for these values only */
190   if (BITS_IN_JSAMPLE != 8)
191     return 0;
192   if (sizeof(JDIMENSION) != 4)
193     return 0;
194   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
195     return 0;
196 
197   if (simd_support & JSIMD_NEON)
198     return 1;
199 
200   return 0;
201 }
202 
203 GLOBAL(int)
jsimd_can_ycc_rgb(void)204 jsimd_can_ycc_rgb(void)
205 {
206   init_simd();
207 
208   /* The code is optimised for these values only */
209   if (BITS_IN_JSAMPLE != 8)
210     return 0;
211   if (sizeof(JDIMENSION) != 4)
212     return 0;
213   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
214     return 0;
215 
216   if (simd_support & JSIMD_NEON)
217     return 1;
218 
219   return 0;
220 }
221 
222 GLOBAL(int)
jsimd_can_ycc_rgb565(void)223 jsimd_can_ycc_rgb565(void)
224 {
225   init_simd();
226 
227   /* The code is optimised for these values only */
228   if (BITS_IN_JSAMPLE != 8)
229     return 0;
230   if (sizeof(JDIMENSION) != 4)
231     return 0;
232 
233   if (simd_support & JSIMD_NEON)
234     return 1;
235 
236   return 0;
237 }
238 
239 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)240 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
241                       JSAMPIMAGE output_buf, JDIMENSION output_row,
242                       int num_rows)
243 {
244   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
245 
246   switch (cinfo->in_color_space) {
247   case JCS_EXT_RGB:
248 #ifndef NEON_INTRINSICS
249     if (simd_features & JSIMD_FASTLD3)
250 #endif
251       neonfct = jsimd_extrgb_ycc_convert_neon;
252 #ifndef NEON_INTRINSICS
253     else
254       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
255 #endif
256     break;
257   case JCS_EXT_RGBX:
258   case JCS_EXT_RGBA:
259     neonfct = jsimd_extrgbx_ycc_convert_neon;
260     break;
261   case JCS_EXT_BGR:
262 #ifndef NEON_INTRINSICS
263     if (simd_features & JSIMD_FASTLD3)
264 #endif
265       neonfct = jsimd_extbgr_ycc_convert_neon;
266 #ifndef NEON_INTRINSICS
267     else
268       neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
269 #endif
270     break;
271   case JCS_EXT_BGRX:
272   case JCS_EXT_BGRA:
273     neonfct = jsimd_extbgrx_ycc_convert_neon;
274     break;
275   case JCS_EXT_XBGR:
276   case JCS_EXT_ABGR:
277     neonfct = jsimd_extxbgr_ycc_convert_neon;
278     break;
279   case JCS_EXT_XRGB:
280   case JCS_EXT_ARGB:
281     neonfct = jsimd_extxrgb_ycc_convert_neon;
282     break;
283   default:
284 #ifndef NEON_INTRINSICS
285     if (simd_features & JSIMD_FASTLD3)
286 #endif
287       neonfct = jsimd_extrgb_ycc_convert_neon;
288 #ifndef NEON_INTRINSICS
289     else
290       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
291 #endif
292     break;
293   }
294 
295   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
296 }
297 
298 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)299 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
300                        JSAMPIMAGE output_buf, JDIMENSION output_row,
301                        int num_rows)
302 {
303   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
304 
305   switch (cinfo->in_color_space) {
306   case JCS_EXT_RGB:
307     neonfct = jsimd_extrgb_gray_convert_neon;
308     break;
309   case JCS_EXT_RGBX:
310   case JCS_EXT_RGBA:
311     neonfct = jsimd_extrgbx_gray_convert_neon;
312     break;
313   case JCS_EXT_BGR:
314     neonfct = jsimd_extbgr_gray_convert_neon;
315     break;
316   case JCS_EXT_BGRX:
317   case JCS_EXT_BGRA:
318     neonfct = jsimd_extbgrx_gray_convert_neon;
319     break;
320   case JCS_EXT_XBGR:
321   case JCS_EXT_ABGR:
322     neonfct = jsimd_extxbgr_gray_convert_neon;
323     break;
324   case JCS_EXT_XRGB:
325   case JCS_EXT_ARGB:
326     neonfct = jsimd_extxrgb_gray_convert_neon;
327     break;
328   default:
329     neonfct = jsimd_extrgb_gray_convert_neon;
330     break;
331   }
332 
333   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
334 }
335 
336 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)337 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
338                       JDIMENSION input_row, JSAMPARRAY output_buf,
339                       int num_rows)
340 {
341   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
342 
343   switch (cinfo->out_color_space) {
344   case JCS_EXT_RGB:
345 #ifndef NEON_INTRINSICS
346     if (simd_features & JSIMD_FASTST3)
347 #endif
348       neonfct = jsimd_ycc_extrgb_convert_neon;
349 #ifndef NEON_INTRINSICS
350     else
351       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
352 #endif
353     break;
354   case JCS_EXT_RGBX:
355   case JCS_EXT_RGBA:
356     neonfct = jsimd_ycc_extrgbx_convert_neon;
357     break;
358   case JCS_EXT_BGR:
359 #ifndef NEON_INTRINSICS
360     if (simd_features & JSIMD_FASTST3)
361 #endif
362       neonfct = jsimd_ycc_extbgr_convert_neon;
363 #ifndef NEON_INTRINSICS
364     else
365       neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
366 #endif
367     break;
368   case JCS_EXT_BGRX:
369   case JCS_EXT_BGRA:
370     neonfct = jsimd_ycc_extbgrx_convert_neon;
371     break;
372   case JCS_EXT_XBGR:
373   case JCS_EXT_ABGR:
374     neonfct = jsimd_ycc_extxbgr_convert_neon;
375     break;
376   case JCS_EXT_XRGB:
377   case JCS_EXT_ARGB:
378     neonfct = jsimd_ycc_extxrgb_convert_neon;
379     break;
380   default:
381 #ifndef NEON_INTRINSICS
382     if (simd_features & JSIMD_FASTST3)
383 #endif
384       neonfct = jsimd_ycc_extrgb_convert_neon;
385 #ifndef NEON_INTRINSICS
386     else
387       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
388 #endif
389     break;
390   }
391 
392   neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
393 }
394 
395 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)396 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
397                          JDIMENSION input_row, JSAMPARRAY output_buf,
398                          int num_rows)
399 {
400   jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
401                                 output_buf, num_rows);
402 }
403 
404 GLOBAL(int)
jsimd_can_h2v2_downsample(void)405 jsimd_can_h2v2_downsample(void)
406 {
407   init_simd();
408 
409   /* The code is optimised for these values only */
410   if (BITS_IN_JSAMPLE != 8)
411     return 0;
412   if (DCTSIZE != 8)
413     return 0;
414   if (sizeof(JDIMENSION) != 4)
415     return 0;
416 
417   if (simd_support & JSIMD_NEON)
418     return 1;
419 
420   return 0;
421 }
422 
423 GLOBAL(int)
jsimd_can_h2v1_downsample(void)424 jsimd_can_h2v1_downsample(void)
425 {
426   init_simd();
427 
428   /* The code is optimised for these values only */
429   if (BITS_IN_JSAMPLE != 8)
430     return 0;
431   if (DCTSIZE != 8)
432     return 0;
433   if (sizeof(JDIMENSION) != 4)
434     return 0;
435 
436   if (simd_support & JSIMD_NEON)
437     return 1;
438 
439   return 0;
440 }
441 
442 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)443 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
444                       JSAMPARRAY input_data, JSAMPARRAY output_data)
445 {
446   jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
447                              compptr->v_samp_factor, compptr->width_in_blocks,
448                              input_data, output_data);
449 }
450 
451 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)452 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
453                       JSAMPARRAY input_data, JSAMPARRAY output_data)
454 {
455   jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
456                              compptr->v_samp_factor, compptr->width_in_blocks,
457                              input_data, output_data);
458 }
459 
460 GLOBAL(int)
jsimd_can_h2v2_upsample(void)461 jsimd_can_h2v2_upsample(void)
462 {
463   init_simd();
464 
465   /* The code is optimised for these values only */
466   if (BITS_IN_JSAMPLE != 8)
467     return 0;
468   if (sizeof(JDIMENSION) != 4)
469     return 0;
470 
471   if (simd_support & JSIMD_NEON)
472     return 1;
473 
474   return 0;
475 }
476 
477 GLOBAL(int)
jsimd_can_h2v1_upsample(void)478 jsimd_can_h2v1_upsample(void)
479 {
480   init_simd();
481 
482   /* The code is optimised for these values only */
483   if (BITS_IN_JSAMPLE != 8)
484     return 0;
485   if (sizeof(JDIMENSION) != 4)
486     return 0;
487   if (simd_support & JSIMD_NEON)
488     return 1;
489 
490   return 0;
491 }
492 
493 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)494 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
495                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
496 {
497   jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
498                            input_data, output_data_ptr);
499 }
500 
501 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)502 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
503                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
504 {
505   jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
506                            input_data, output_data_ptr);
507 }
508 
509 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)510 jsimd_can_h2v2_fancy_upsample(void)
511 {
512   init_simd();
513 
514   /* The code is optimised for these values only */
515   if (BITS_IN_JSAMPLE != 8)
516     return 0;
517   if (sizeof(JDIMENSION) != 4)
518     return 0;
519 
520   if (simd_support & JSIMD_NEON)
521     return 1;
522 
523   return 0;
524 }
525 
526 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)527 jsimd_can_h2v1_fancy_upsample(void)
528 {
529   init_simd();
530 
531   /* The code is optimised for these values only */
532   if (BITS_IN_JSAMPLE != 8)
533     return 0;
534   if (sizeof(JDIMENSION) != 4)
535     return 0;
536 
537   if (simd_support & JSIMD_NEON)
538     return 1;
539 
540   return 0;
541 }
542 
543 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)544 jsimd_can_h1v2_fancy_upsample(void)
545 {
546   init_simd();
547 
548   /* The code is optimised for these values only */
549   if (BITS_IN_JSAMPLE != 8)
550     return 0;
551   if (sizeof(JDIMENSION) != 4)
552     return 0;
553 
554   if (simd_support & JSIMD_NEON)
555     return 1;
556 
557   return 0;
558 }
559 
560 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)561 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
562                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
563 {
564   jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
565                                  compptr->downsampled_width, input_data,
566                                  output_data_ptr);
567 }
568 
569 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)570 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
571                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
572 {
573   jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
574                                  compptr->downsampled_width, input_data,
575                                  output_data_ptr);
576 }
577 
578 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)579 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
580                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
581 {
582   jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
583                                  compptr->downsampled_width, input_data,
584                                  output_data_ptr);
585 }
586 
587 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)588 jsimd_can_h2v2_merged_upsample(void)
589 {
590   init_simd();
591 
592   /* The code is optimised for these values only */
593   if (BITS_IN_JSAMPLE != 8)
594     return 0;
595   if (sizeof(JDIMENSION) != 4)
596     return 0;
597 
598   if (simd_support & JSIMD_NEON)
599     return 1;
600 
601   return 0;
602 }
603 
604 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)605 jsimd_can_h2v1_merged_upsample(void)
606 {
607   init_simd();
608 
609   /* The code is optimised for these values only */
610   if (BITS_IN_JSAMPLE != 8)
611     return 0;
612   if (sizeof(JDIMENSION) != 4)
613     return 0;
614 
615   if (simd_support & JSIMD_NEON)
616     return 1;
617 
618   return 0;
619 }
620 
621 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)622 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
624 {
625   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
626 
627   switch (cinfo->out_color_space) {
628     case JCS_EXT_RGB:
629       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
630       break;
631     case JCS_EXT_RGBX:
632     case JCS_EXT_RGBA:
633       neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
634       break;
635     case JCS_EXT_BGR:
636       neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
637       break;
638     case JCS_EXT_BGRX:
639     case JCS_EXT_BGRA:
640       neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
641       break;
642     case JCS_EXT_XBGR:
643     case JCS_EXT_ABGR:
644       neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
645       break;
646     case JCS_EXT_XRGB:
647     case JCS_EXT_ARGB:
648       neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
649       break;
650     default:
651       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
652       break;
653   }
654 
655   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
656 }
657 
658 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)659 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
660                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
661 {
662   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
663 
664   switch (cinfo->out_color_space) {
665     case JCS_EXT_RGB:
666       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
667       break;
668     case JCS_EXT_RGBX:
669     case JCS_EXT_RGBA:
670       neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
671       break;
672     case JCS_EXT_BGR:
673       neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
674       break;
675     case JCS_EXT_BGRX:
676     case JCS_EXT_BGRA:
677       neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
678       break;
679     case JCS_EXT_XBGR:
680     case JCS_EXT_ABGR:
681       neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
682       break;
683     case JCS_EXT_XRGB:
684     case JCS_EXT_ARGB:
685       neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
686       break;
687     default:
688       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
689       break;
690   }
691 
692   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
693 }
694 
695 GLOBAL(int)
jsimd_can_convsamp(void)696 jsimd_can_convsamp(void)
697 {
698   init_simd();
699 
700   /* The code is optimised for these values only */
701   if (DCTSIZE != 8)
702     return 0;
703   if (BITS_IN_JSAMPLE != 8)
704     return 0;
705   if (sizeof(JDIMENSION) != 4)
706     return 0;
707   if (sizeof(DCTELEM) != 2)
708     return 0;
709 
710   if (simd_support & JSIMD_NEON)
711     return 1;
712 
713   return 0;
714 }
715 
716 GLOBAL(int)
jsimd_can_convsamp_float(void)717 jsimd_can_convsamp_float(void)
718 {
719   return 0;
720 }
721 
722 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)723 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
724                DCTELEM *workspace)
725 {
726   jsimd_convsamp_neon(sample_data, start_col, workspace);
727 }
728 
729 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)730 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
731                      FAST_FLOAT *workspace)
732 {
733 }
734 
735 GLOBAL(int)
jsimd_can_fdct_islow(void)736 jsimd_can_fdct_islow(void)
737 {
738   init_simd();
739 
740   /* The code is optimised for these values only */
741   if (DCTSIZE != 8)
742     return 0;
743   if (sizeof(DCTELEM) != 2)
744     return 0;
745 
746   if (simd_support & JSIMD_NEON)
747     return 1;
748 
749   return 0;
750 }
751 
752 GLOBAL(int)
jsimd_can_fdct_ifast(void)753 jsimd_can_fdct_ifast(void)
754 {
755   init_simd();
756 
757   /* The code is optimised for these values only */
758   if (DCTSIZE != 8)
759     return 0;
760   if (sizeof(DCTELEM) != 2)
761     return 0;
762 
763   if (simd_support & JSIMD_NEON)
764     return 1;
765 
766   return 0;
767 }
768 
769 GLOBAL(int)
jsimd_can_fdct_float(void)770 jsimd_can_fdct_float(void)
771 {
772   return 0;
773 }
774 
775 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)776 jsimd_fdct_islow(DCTELEM *data)
777 {
778   jsimd_fdct_islow_neon(data);
779 }
780 
781 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)782 jsimd_fdct_ifast(DCTELEM *data)
783 {
784   jsimd_fdct_ifast_neon(data);
785 }
786 
787 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)788 jsimd_fdct_float(FAST_FLOAT *data)
789 {
790 }
791 
792 GLOBAL(int)
jsimd_can_quantize(void)793 jsimd_can_quantize(void)
794 {
795   init_simd();
796 
797   /* The code is optimised for these values only */
798   if (DCTSIZE != 8)
799     return 0;
800   if (sizeof(JCOEF) != 2)
801     return 0;
802   if (sizeof(DCTELEM) != 2)
803     return 0;
804 
805   if (simd_support & JSIMD_NEON)
806     return 1;
807 
808   return 0;
809 }
810 
811 GLOBAL(int)
jsimd_can_quantize_float(void)812 jsimd_can_quantize_float(void)
813 {
814   return 0;
815 }
816 
817 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)818 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
819 {
820   jsimd_quantize_neon(coef_block, divisors, workspace);
821 }
822 
823 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)824 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
825                      FAST_FLOAT *workspace)
826 {
827 }
828 
829 GLOBAL(int)
jsimd_can_idct_2x2(void)830 jsimd_can_idct_2x2(void)
831 {
832   init_simd();
833 
834   /* The code is optimised for these values only */
835   if (DCTSIZE != 8)
836     return 0;
837   if (sizeof(JCOEF) != 2)
838     return 0;
839   if (BITS_IN_JSAMPLE != 8)
840     return 0;
841   if (sizeof(JDIMENSION) != 4)
842     return 0;
843   if (sizeof(ISLOW_MULT_TYPE) != 2)
844     return 0;
845 
846   if (simd_support & JSIMD_NEON)
847     return 1;
848 
849   return 0;
850 }
851 
852 GLOBAL(int)
jsimd_can_idct_4x4(void)853 jsimd_can_idct_4x4(void)
854 {
855   init_simd();
856 
857   /* The code is optimised for these values only */
858   if (DCTSIZE != 8)
859     return 0;
860   if (sizeof(JCOEF) != 2)
861     return 0;
862   if (BITS_IN_JSAMPLE != 8)
863     return 0;
864   if (sizeof(JDIMENSION) != 4)
865     return 0;
866   if (sizeof(ISLOW_MULT_TYPE) != 2)
867     return 0;
868 
869   if (simd_support & JSIMD_NEON)
870     return 1;
871 
872   return 0;
873 }
874 
875 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)876 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
877                JCOEFPTR coef_block, JSAMPARRAY output_buf,
878                JDIMENSION output_col)
879 {
880   jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
881 }
882 
883 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)884 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
885                JCOEFPTR coef_block, JSAMPARRAY output_buf,
886                JDIMENSION output_col)
887 {
888   jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
889 }
890 
891 GLOBAL(int)
jsimd_can_idct_islow(void)892 jsimd_can_idct_islow(void)
893 {
894   init_simd();
895 
896   /* The code is optimised for these values only */
897   if (DCTSIZE != 8)
898     return 0;
899   if (sizeof(JCOEF) != 2)
900     return 0;
901   if (BITS_IN_JSAMPLE != 8)
902     return 0;
903   if (sizeof(JDIMENSION) != 4)
904     return 0;
905   if (sizeof(ISLOW_MULT_TYPE) != 2)
906     return 0;
907 
908   if (simd_support & JSIMD_NEON)
909     return 1;
910 
911   return 0;
912 }
913 
914 GLOBAL(int)
jsimd_can_idct_ifast(void)915 jsimd_can_idct_ifast(void)
916 {
917   init_simd();
918 
919   /* The code is optimised for these values only */
920   if (DCTSIZE != 8)
921     return 0;
922   if (sizeof(JCOEF) != 2)
923     return 0;
924   if (BITS_IN_JSAMPLE != 8)
925     return 0;
926   if (sizeof(JDIMENSION) != 4)
927     return 0;
928   if (sizeof(IFAST_MULT_TYPE) != 2)
929     return 0;
930   if (IFAST_SCALE_BITS != 2)
931     return 0;
932 
933   if (simd_support & JSIMD_NEON)
934     return 1;
935 
936   return 0;
937 }
938 
939 GLOBAL(int)
jsimd_can_idct_float(void)940 jsimd_can_idct_float(void)
941 {
942   return 0;
943 }
944 
945 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)946 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
947                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
948                  JDIMENSION output_col)
949 {
950   jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
951                         output_col);
952 }
953 
954 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)955 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
956                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
957                  JDIMENSION output_col)
958 {
959   jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
960                         output_col);
961 }
962 
963 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)964 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
965                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
966                  JDIMENSION output_col)
967 {
968 }
969 
970 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)971 jsimd_can_huff_encode_one_block(void)
972 {
973   init_simd();
974 
975   if (DCTSIZE != 8)
976     return 0;
977   if (sizeof(JCOEF) != 2)
978     return 0;
979 
980   if (simd_support & JSIMD_NEON && simd_huffman)
981     return 1;
982 
983   return 0;
984 }
985 
986 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)987 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
988                             int last_dc_val, c_derived_tbl *dctbl,
989                             c_derived_tbl *actbl)
990 {
991 #ifndef NEON_INTRINSICS
992   if (simd_features & JSIMD_FASTTBL)
993 #endif
994     return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
995                                             dctbl, actbl);
996 #ifndef NEON_INTRINSICS
997   else
998     return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
999                                                     last_dc_val, dctbl, actbl);
1000 #endif
1001 }
1002 
1003 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1004 jsimd_can_encode_mcu_AC_first_prepare(void)
1005 {
1006   init_simd();
1007 
1008   if (DCTSIZE != 8)
1009     return 0;
1010   if (sizeof(JCOEF) != 2)
1011     return 0;
1012   if (SIZEOF_SIZE_T != 8)
1013     return 0;
1014 
1015   if (simd_support & JSIMD_NEON)
1016     return 1;
1017 
1018   return 0;
1019 }
1020 
1021 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1022 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1023                                   const int *jpeg_natural_order_start, int Sl,
1024                                   int Al, JCOEF *values, size_t *zerobits)
1025 {
1026   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1027                                          Sl, Al, values, zerobits);
1028 }
1029 
1030 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1031 jsimd_can_encode_mcu_AC_refine_prepare(void)
1032 {
1033   init_simd();
1034 
1035   if (DCTSIZE != 8)
1036     return 0;
1037   if (sizeof(JCOEF) != 2)
1038     return 0;
1039   if (SIZEOF_SIZE_T != 8)
1040     return 0;
1041 
1042   if (simd_support & JSIMD_NEON)
1043     return 1;
1044 
1045   return 0;
1046 }
1047 
1048 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1049 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1050                                    const int *jpeg_natural_order_start, int Sl,
1051                                    int Al, JCOEF *absvalues, size_t *bits)
1052 {
1053   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1054                                                  jpeg_natural_order_start,
1055                                                  Sl, Al, absvalues, bits);
1056 }
1057