• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* chunkset_neon.c -- NEON inline functions to copy small data chunks.
2  * For conditions of distribution and use, see copyright notice in zlib.h
3  */
4 
5 #ifdef ARM_NEON_CHUNKSET
6 #ifdef _M_ARM64
7 #  include <arm64_neon.h>
8 #else
9 #  include <arm_neon.h>
10 #endif
11 #include "../../zbuild.h"
12 #include "../../zutil.h"
13 
14 typedef uint8x16_t chunk_t;
15 
16 #define HAVE_CHUNKMEMSET_1
17 #define HAVE_CHUNKMEMSET_2
18 #define HAVE_CHUNKMEMSET_4
19 #define HAVE_CHUNKMEMSET_8
20 
chunkmemset_1(uint8_t * from,chunk_t * chunk)21 static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
22     *chunk = vld1q_dup_u8(from);
23 }
24 
chunkmemset_2(uint8_t * from,chunk_t * chunk)25 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
26     int16_t tmp;
27     memcpy(&tmp, from, 2);
28     *chunk = vreinterpretq_u8_s16(vdupq_n_s16(tmp));
29 }
30 
chunkmemset_4(uint8_t * from,chunk_t * chunk)31 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
32     int32_t tmp;
33     memcpy(&tmp, from, 4);
34     *chunk = vreinterpretq_u8_s32(vdupq_n_s32(tmp));
35 }
36 
chunkmemset_8(uint8_t * from,chunk_t * chunk)37 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
38     *chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
39 }
40 
41 #define CHUNKSIZE        chunksize_neon
42 #define CHUNKCOPY        chunkcopy_neon
43 #define CHUNKCOPY_SAFE   chunkcopy_safe_neon
44 #define CHUNKUNROLL      chunkunroll_neon
45 #define CHUNKMEMSET      chunkmemset_neon
46 #define CHUNKMEMSET_SAFE chunkmemset_safe_neon
47 
loadchunk(uint8_t const * s,chunk_t * chunk)48 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
49     *chunk = vld1q_u8(s);
50 }
51 
storechunk(uint8_t * out,chunk_t * chunk)52 static inline void storechunk(uint8_t *out, chunk_t *chunk) {
53     vst1q_u8(out, *chunk);
54 }
55 
56 #include "chunkset_tpl.h"
57 
58 #endif
59