1 /*
2 * jsimd_arm64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011, 2013-2014 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 64-bit ARM architecture.
14 */
15
16 #define JPEG_INTERNALS
17 #include "../jinclude.h"
18 #include "../jpeglib.h"
19 #include "../jsimd.h"
20 #include "../jdct.h"
21 #include "../jsimddct.h"
22 #include "jsimd.h"
23
24 #include <stdio.h>
25 #include <string.h>
26 #include <ctype.h>
27
28 static unsigned int simd_support = ~0;
29
30 /*
31 * Check what SIMD accelerations are supported.
32 *
33 * FIXME: This code is racy under a multi-threaded environment.
34 */
35
36 /*
37 * ARMv8 architectures support NEON extensions by default.
38 * It is no longer optional as it was with ARMv7.
39 */
40
41
42 LOCAL(void)
init_simd(void)43 init_simd (void)
44 {
45 char *env = NULL;
46
47 if (simd_support != ~0U)
48 return;
49
50 simd_support = 0;
51
52 simd_support |= JSIMD_ARM_NEON;
53
54 /* Force different settings through environment variables */
55 env = getenv("JSIMD_FORCENEON");
56 if ((env != NULL) && (strcmp(env, "1") == 0))
57 simd_support &= JSIMD_ARM_NEON;
58 env = getenv("JSIMD_FORCENONE");
59 if ((env != NULL) && (strcmp(env, "1") == 0))
60 simd_support = 0;
61 }
62
63 GLOBAL(int)
jsimd_can_rgb_ycc(void)64 jsimd_can_rgb_ycc (void)
65 {
66 init_simd();
67
68 return 0;
69 }
70
71 GLOBAL(int)
jsimd_can_rgb_gray(void)72 jsimd_can_rgb_gray (void)
73 {
74 init_simd();
75
76 return 0;
77 }
78
79 GLOBAL(int)
jsimd_can_ycc_rgb(void)80 jsimd_can_ycc_rgb (void)
81 {
82 init_simd();
83
84 /* The code is optimised for these values only */
85 if (BITS_IN_JSAMPLE != 8)
86 return 0;
87 if (sizeof(JDIMENSION) != 4)
88 return 0;
89 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
90 return 0;
91
92 if (simd_support & JSIMD_ARM_NEON)
93 return 1;
94
95 return 0;
96 }
97
98 GLOBAL(int)
jsimd_can_ycc_rgb565(void)99 jsimd_can_ycc_rgb565 (void)
100 {
101 init_simd();
102
103 /* The code is optimised for these values only */
104 if (BITS_IN_JSAMPLE != 8)
105 return 0;
106 if (sizeof(JDIMENSION) != 4)
107 return 0;
108
109 if (simd_support & JSIMD_ARM_NEON)
110 return 1;
111
112 return 0;
113 }
114
115 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)116 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
117 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
118 JDIMENSION output_row, int num_rows)
119 {
120 }
121
122 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)123 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
124 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
125 JDIMENSION output_row, int num_rows)
126 {
127 }
128
129 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)130 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
131 JSAMPIMAGE input_buf, JDIMENSION input_row,
132 JSAMPARRAY output_buf, int num_rows)
133 {
134 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
135
136 switch(cinfo->out_color_space) {
137 case JCS_EXT_RGB:
138 neonfct=jsimd_ycc_extrgb_convert_neon;
139 break;
140 case JCS_EXT_RGBX:
141 case JCS_EXT_RGBA:
142 neonfct=jsimd_ycc_extrgbx_convert_neon;
143 break;
144 case JCS_EXT_BGR:
145 neonfct=jsimd_ycc_extbgr_convert_neon;
146 break;
147 case JCS_EXT_BGRX:
148 case JCS_EXT_BGRA:
149 neonfct=jsimd_ycc_extbgrx_convert_neon;
150 break;
151 case JCS_EXT_XBGR:
152 case JCS_EXT_ABGR:
153 neonfct=jsimd_ycc_extxbgr_convert_neon;
154 break;
155 case JCS_EXT_XRGB:
156 case JCS_EXT_ARGB:
157 neonfct=jsimd_ycc_extxrgb_convert_neon;
158 break;
159 default:
160 neonfct=jsimd_ycc_extrgb_convert_neon;
161 break;
162 }
163
164 if (simd_support & JSIMD_ARM_NEON)
165 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
166 }
167
168 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)169 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
170 JSAMPIMAGE input_buf, JDIMENSION input_row,
171 JSAMPARRAY output_buf, int num_rows)
172 {
173 if (simd_support & JSIMD_ARM_NEON)
174 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
175 output_buf, num_rows);
176 }
177
178 GLOBAL(int)
jsimd_can_h2v2_downsample(void)179 jsimd_can_h2v2_downsample (void)
180 {
181 init_simd();
182
183 return 0;
184 }
185
186 GLOBAL(int)
jsimd_can_h2v1_downsample(void)187 jsimd_can_h2v1_downsample (void)
188 {
189 init_simd();
190
191 return 0;
192 }
193
194 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)195 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
196 JSAMPARRAY input_data, JSAMPARRAY output_data)
197 {
198 }
199
200 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)201 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
202 JSAMPARRAY input_data, JSAMPARRAY output_data)
203 {
204 }
205
206 GLOBAL(int)
jsimd_can_h2v2_upsample(void)207 jsimd_can_h2v2_upsample (void)
208 {
209 init_simd();
210
211 return 0;
212 }
213
214 GLOBAL(int)
jsimd_can_h2v1_upsample(void)215 jsimd_can_h2v1_upsample (void)
216 {
217 init_simd();
218
219 return 0;
220 }
221
222 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)223 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
224 jpeg_component_info * compptr,
225 JSAMPARRAY input_data,
226 JSAMPARRAY * output_data_ptr)
227 {
228 }
229
230 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)231 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
232 jpeg_component_info * compptr,
233 JSAMPARRAY input_data,
234 JSAMPARRAY * output_data_ptr)
235 {
236 }
237
238 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)239 jsimd_can_h2v2_fancy_upsample (void)
240 {
241 init_simd();
242
243 return 0;
244 }
245
246 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)247 jsimd_can_h2v1_fancy_upsample (void)
248 {
249 init_simd();
250
251 return 0;
252 }
253
254 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)255 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
256 jpeg_component_info * compptr,
257 JSAMPARRAY input_data,
258 JSAMPARRAY * output_data_ptr)
259 {
260 }
261
262 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)263 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
264 jpeg_component_info * compptr,
265 JSAMPARRAY input_data,
266 JSAMPARRAY * output_data_ptr)
267 {
268 }
269
270 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)271 jsimd_can_h2v2_merged_upsample (void)
272 {
273 init_simd();
274
275 return 0;
276 }
277
278 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)279 jsimd_can_h2v1_merged_upsample (void)
280 {
281 init_simd();
282
283 return 0;
284 }
285
286 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)287 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
288 JSAMPIMAGE input_buf,
289 JDIMENSION in_row_group_ctr,
290 JSAMPARRAY output_buf)
291 {
292 }
293
294 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)295 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
296 JSAMPIMAGE input_buf,
297 JDIMENSION in_row_group_ctr,
298 JSAMPARRAY output_buf)
299 {
300 }
301
302 GLOBAL(int)
jsimd_can_convsamp(void)303 jsimd_can_convsamp (void)
304 {
305 init_simd();
306
307 return 0;
308 }
309
310 GLOBAL(int)
jsimd_can_convsamp_float(void)311 jsimd_can_convsamp_float (void)
312 {
313 init_simd();
314
315 return 0;
316 }
317
318 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)319 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
320 DCTELEM * workspace)
321 {
322 }
323
324 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)325 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
326 FAST_FLOAT * workspace)
327 {
328 }
329
330 GLOBAL(int)
jsimd_can_fdct_islow(void)331 jsimd_can_fdct_islow (void)
332 {
333 init_simd();
334
335 return 0;
336 }
337
338 GLOBAL(int)
jsimd_can_fdct_ifast(void)339 jsimd_can_fdct_ifast (void)
340 {
341 init_simd();
342
343 return 0;
344 }
345
346 GLOBAL(int)
jsimd_can_fdct_float(void)347 jsimd_can_fdct_float (void)
348 {
349 init_simd();
350
351 return 0;
352 }
353
354 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)355 jsimd_fdct_islow (DCTELEM * data)
356 {
357 }
358
359 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)360 jsimd_fdct_ifast (DCTELEM * data)
361 {
362 }
363
364 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)365 jsimd_fdct_float (FAST_FLOAT * data)
366 {
367 }
368
369 GLOBAL(int)
jsimd_can_quantize(void)370 jsimd_can_quantize (void)
371 {
372 init_simd();
373
374 return 0;
375 }
376
377 GLOBAL(int)
jsimd_can_quantize_float(void)378 jsimd_can_quantize_float (void)
379 {
380 init_simd();
381
382 return 0;
383 }
384
385 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)386 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
387 DCTELEM * workspace)
388 {
389 }
390
391 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)392 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
393 FAST_FLOAT * workspace)
394 {
395 }
396
397 GLOBAL(int)
jsimd_can_idct_2x2(void)398 jsimd_can_idct_2x2 (void)
399 {
400 init_simd();
401
402 /* The code is optimised for these values only */
403 if (DCTSIZE != 8)
404 return 0;
405 if (sizeof(JCOEF) != 2)
406 return 0;
407 if (BITS_IN_JSAMPLE != 8)
408 return 0;
409 if (sizeof(JDIMENSION) != 4)
410 return 0;
411 if (sizeof(ISLOW_MULT_TYPE) != 2)
412 return 0;
413
414 if (simd_support & JSIMD_ARM_NEON)
415 return 1;
416
417 return 0;
418 }
419
420 GLOBAL(int)
jsimd_can_idct_4x4(void)421 jsimd_can_idct_4x4 (void)
422 {
423 init_simd();
424
425 /* The code is optimised for these values only */
426 if (DCTSIZE != 8)
427 return 0;
428 if (sizeof(JCOEF) != 2)
429 return 0;
430 if (BITS_IN_JSAMPLE != 8)
431 return 0;
432 if (sizeof(JDIMENSION) != 4)
433 return 0;
434 if (sizeof(ISLOW_MULT_TYPE) != 2)
435 return 0;
436
437 if (simd_support & JSIMD_ARM_NEON)
438 return 1;
439
440 return 0;
441 }
442
443 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)444 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
445 JCOEFPTR coef_block, JSAMPARRAY output_buf,
446 JDIMENSION output_col)
447 {
448 if (simd_support & JSIMD_ARM_NEON)
449 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
450 output_col);
451 }
452
453 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)454 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
455 JCOEFPTR coef_block, JSAMPARRAY output_buf,
456 JDIMENSION output_col)
457 {
458 if (simd_support & JSIMD_ARM_NEON)
459 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
460 output_col);
461 }
462
463 GLOBAL(int)
jsimd_can_idct_islow(void)464 jsimd_can_idct_islow (void)
465 {
466 init_simd();
467
468 /* The code is optimised for these values only */
469 if (DCTSIZE != 8)
470 return 0;
471 if (sizeof(JCOEF) != 2)
472 return 0;
473 if (BITS_IN_JSAMPLE != 8)
474 return 0;
475 if (sizeof(JDIMENSION) != 4)
476 return 0;
477 if (sizeof(ISLOW_MULT_TYPE) != 2)
478 return 0;
479
480 if (simd_support & JSIMD_ARM_NEON)
481 return 1;
482
483 return 0;
484 }
485
486 GLOBAL(int)
jsimd_can_idct_ifast(void)487 jsimd_can_idct_ifast (void)
488 {
489 init_simd();
490
491 /* The code is optimised for these values only */
492 if (DCTSIZE != 8)
493 return 0;
494 if (sizeof(JCOEF) != 2)
495 return 0;
496 if (BITS_IN_JSAMPLE != 8)
497 return 0;
498 if (sizeof(JDIMENSION) != 4)
499 return 0;
500 if (sizeof(IFAST_MULT_TYPE) != 2)
501 return 0;
502 if (IFAST_SCALE_BITS != 2)
503 return 0;
504
505 if (simd_support & JSIMD_ARM_NEON)
506 return 1;
507
508 return 0;
509 }
510
511 GLOBAL(int)
jsimd_can_idct_float(void)512 jsimd_can_idct_float (void)
513 {
514 init_simd();
515
516 return 0;
517 }
518
519 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)520 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
521 JCOEFPTR coef_block, JSAMPARRAY output_buf,
522 JDIMENSION output_col)
523 {
524 if (simd_support & JSIMD_ARM_NEON)
525 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
526 output_col);
527 }
528
529 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)530 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
531 JCOEFPTR coef_block, JSAMPARRAY output_buf,
532 JDIMENSION output_col)
533 {
534 if (simd_support & JSIMD_ARM_NEON)
535 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
536 output_col);
537 }
538
539 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)540 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
541 JCOEFPTR coef_block, JSAMPARRAY output_buf,
542 JDIMENSION output_col)
543 {
544 }
545