• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* cpu_features.h -- CPU architecture feature check
2  * Copyright (C) 2017 Hans Kristian Rosbach
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #ifndef CPU_FEATURES_H_
7 #define CPU_FEATURES_H_
8 
9 #include "adler32_fold.h"
10 #include "crc32_fold.h"
11 
12 #if defined(X86_FEATURES)
13 #  include "arch/x86/x86_features.h"
14 #  include "fallback_builtins.h"
15 #elif defined(ARM_FEATURES)
16 #  include "arch/arm/arm_features.h"
17 #elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
18 #  include "arch/power/power_features.h"
19 #elif defined(S390_FEATURES)
20 #  include "arch/s390/s390_features.h"
21 #endif
22 
23 extern void cpu_check_features(void);
24 
25 /* adler32 */
26 typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, size_t len);
27 
28 extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
29 #ifdef ARM_NEON_ADLER32
30 extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
31 #endif
32 #ifdef PPC_VMX_ADLER32
33 extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len);
34 #endif
35 #ifdef X86_SSSE3_ADLER32
36 extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
37 #endif
38 #ifdef X86_AVX2_ADLER32
39 extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
40 #endif
41 #ifdef X86_AVX512_ADLER32
42 extern uint32_t adler32_avx512(uint32_t adler, const unsigned char *buf, size_t len);
43 #endif
44 #ifdef X86_AVX512VNNI_ADLER32
45 extern uint32_t adler32_avx512_vnni(uint32_t adler, const unsigned char *buf, size_t len);
46 #endif
47 #ifdef POWER8_VSX_ADLER32
48 extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
49 #endif
50 
51 /* adler32 folding */
52 #ifdef X86_SSE42_ADLER32
53 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
54 #endif
55 #ifdef X86_AVX2_ADLER32
56 extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
57 #endif
58 #ifdef X86_AVX512_ADLER32
59 extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
60 #endif
61 #ifdef X86_AVX512VNNI_ADLER32
62 extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
63 #endif
64 
65 /* CRC32 folding */
66 #ifdef X86_PCLMULQDQ_CRC
67 extern uint32_t crc32_fold_reset_pclmulqdq(crc32_fold *crc);
68 extern void     crc32_fold_copy_pclmulqdq(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
69 extern void     crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
70 extern uint32_t crc32_fold_final_pclmulqdq(crc32_fold *crc);
71 extern uint32_t crc32_pclmulqdq(uint32_t crc32, const unsigned char* buf, uint64_t len);
72 #endif
73 
74 /* memory chunking */
75 extern uint32_t chunksize_c(void);
76 extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len);
77 extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
78 extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
79 extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
80 extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
81 #ifdef X86_SSE2_CHUNKSET
82 extern uint32_t chunksize_sse2(void);
83 extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
84 extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
85 extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
86 extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
87 extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
88 #endif
89 #ifdef X86_SSE41
90 extern uint8_t* chunkmemset_sse41(uint8_t *out, unsigned dist, unsigned len);
91 extern uint8_t* chunkmemset_safe_sse41(uint8_t *out, unsigned dist, unsigned len, unsigned left);
92 #endif
93 #ifdef X86_AVX_CHUNKSET
94 extern uint32_t chunksize_avx(void);
95 extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
96 extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
97 extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
98 extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
99 extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
100 #endif
101 #ifdef ARM_NEON_CHUNKSET
102 extern uint32_t chunksize_neon(void);
103 extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
104 extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
105 extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len);
106 extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len);
107 extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
108 #endif
109 #ifdef POWER8_VSX_CHUNKSET
110 extern uint32_t chunksize_power8(void);
111 extern uint8_t* chunkcopy_power8(uint8_t *out, uint8_t const *from, unsigned len);
112 extern uint8_t* chunkcopy_safe_power8(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
113 extern uint8_t* chunkunroll_power8(uint8_t *out, unsigned *dist, unsigned *len);
114 extern uint8_t* chunkmemset_power8(uint8_t *out, unsigned dist, unsigned len);
115 extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
116 #endif
117 
118 /* CRC32 */
119 typedef uint32_t (*crc32_func)(uint32_t crc32, const unsigned char * buf, uint64_t len);
120 
121 extern uint32_t crc32_braid(uint32_t crc, const unsigned char *buf, uint64_t len);
122 #ifdef ARM_ACLE_CRC_HASH
123 extern uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len);
124 #elif defined(POWER8_VSX_CRC32)
125 extern uint32_t crc32_power8(uint32_t crc, const unsigned char *buf, uint64_t len);
126 #elif defined(S390_CRC32_VX)
127 extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, uint64_t len);
128 #endif
129 
130 /* compare256 */
131 typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
132 
133 extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
134 #ifdef UNALIGNED_OK
135 extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
136 #ifdef HAVE_BUILTIN_CTZ
137 extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
138 #endif
139 #if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
140 extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
141 #endif
142 #endif
143 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
144 extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
145 #endif
146 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
147 extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
148 #endif
149 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
150 extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
151 #endif
152 #ifdef POWER9
153 extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
154 #endif
155 
156 #ifdef DEFLATE_H_
157 /* insert_string */
158 extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
159 #ifdef X86_SSE42_CRC_HASH
160 extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
161 #elif defined(ARM_ACLE_CRC_HASH)
162 extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
163 #endif
164 
165 /* longest_match */
166 extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
167 #ifdef UNALIGNED_OK
168 extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
169 #ifdef HAVE_BUILTIN_CTZ
170 extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
171 #endif
172 #if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
173 extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
174 #endif
175 #endif
176 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
177 extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
178 #endif
179 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
180 extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
181 #endif
182 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
183 extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
184 #endif
185 #ifdef POWER9
186 extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
187 #endif
188 
189 /* longest_match_slow */
190 extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
191 #ifdef UNALIGNED_OK
192 extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
193 extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
194 #ifdef UNALIGNED64_OK
195 extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
196 #endif
197 #endif
198 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
199 extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
200 #endif
201 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
202 extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
203 #endif
204 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
205 extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
206 #endif
207 #ifdef POWER9
208 extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
209 #endif
210 
211 /* quick_insert_string */
212 extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
213 #ifdef X86_SSE42_CRC_HASH
214 extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
215 #elif defined(ARM_ACLE_CRC_HASH)
216 extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
217 #endif
218 
219 /* slide_hash */
220 typedef void (*slide_hash_func)(deflate_state *s);
221 
222 #ifdef X86_SSE2
223 extern void slide_hash_sse2(deflate_state *s);
224 #elif defined(ARM_NEON_SLIDEHASH)
225 extern void slide_hash_neon(deflate_state *s);
226 #endif
227 #if defined(PPC_VMX_SLIDEHASH)
228 extern void slide_hash_vmx(deflate_state *s);
229 #endif
230 #if defined(POWER8_VSX_SLIDEHASH)
231 extern void slide_hash_power8(deflate_state *s);
232 #endif
233 #ifdef X86_AVX2
234 extern void slide_hash_avx2(deflate_state *s);
235 #endif
236 
237 /* update_hash */
238 extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
239 #ifdef X86_SSE42_CRC_HASH
240 extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val);
241 #elif defined(ARM_ACLE_CRC_HASH)
242 extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
243 #endif
244 #endif
245 
246 #endif
247