1 /* cpu_features.h -- CPU architecture feature check 2 * Copyright (C) 2017 Hans Kristian Rosbach 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6 #ifndef CPU_FEATURES_H_ 7 #define CPU_FEATURES_H_ 8 9 #include "adler32_fold.h" 10 #include "crc32_fold.h" 11 12 #if defined(X86_FEATURES) 13 # include "arch/x86/x86_features.h" 14 # include "fallback_builtins.h" 15 #elif defined(ARM_FEATURES) 16 # include "arch/arm/arm_features.h" 17 #elif defined(PPC_FEATURES) || defined(POWER_FEATURES) 18 # include "arch/power/power_features.h" 19 #elif defined(S390_FEATURES) 20 # include "arch/s390/s390_features.h" 21 #endif 22 23 extern void cpu_check_features(void); 24 25 /* adler32 */ 26 typedef uint32_t (*adler32_func)(uint32_t adler, const unsigned char *buf, size_t len); 27 28 extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); 29 #ifdef ARM_NEON_ADLER32 30 extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); 31 #endif 32 #ifdef PPC_VMX_ADLER32 33 extern uint32_t adler32_vmx(uint32_t adler, const unsigned char *buf, size_t len); 34 #endif 35 #ifdef X86_SSSE3_ADLER32 36 extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len); 37 #endif 38 #ifdef X86_AVX2_ADLER32 39 extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len); 40 #endif 41 #ifdef X86_AVX512_ADLER32 42 extern uint32_t adler32_avx512(uint32_t adler, const unsigned char *buf, size_t len); 43 #endif 44 #ifdef X86_AVX512VNNI_ADLER32 45 extern uint32_t adler32_avx512_vnni(uint32_t adler, const unsigned char *buf, size_t len); 46 #endif 47 #ifdef POWER8_VSX_ADLER32 48 extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len); 49 #endif 50 51 /* adler32 folding */ 52 #ifdef X86_SSE42_ADLER32 53 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); 54 #endif 55 #ifdef X86_AVX2_ADLER32 56 extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); 57 #endif 58 #ifdef X86_AVX512_ADLER32 59 extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); 60 #endif 61 #ifdef X86_AVX512VNNI_ADLER32 62 extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); 63 #endif 64 65 /* CRC32 folding */ 66 #ifdef X86_PCLMULQDQ_CRC 67 extern uint32_t crc32_fold_reset_pclmulqdq(crc32_fold *crc); 68 extern void crc32_fold_copy_pclmulqdq(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); 69 extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); 70 extern uint32_t crc32_fold_final_pclmulqdq(crc32_fold *crc); 71 extern uint32_t crc32_pclmulqdq(uint32_t crc32, const unsigned char* buf, uint64_t len); 72 #endif 73 74 /* memory chunking */ 75 extern uint32_t chunksize_c(void); 76 extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len); 77 extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); 78 extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len); 79 extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len); 80 extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); 81 #ifdef X86_SSE2_CHUNKSET 82 extern uint32_t chunksize_sse2(void); 83 extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); 84 extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); 85 extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); 86 extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len); 87 extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); 88 #endif 89 #ifdef X86_SSE41 90 extern uint8_t* chunkmemset_sse41(uint8_t *out, unsigned dist, unsigned len); 91 extern uint8_t* chunkmemset_safe_sse41(uint8_t *out, unsigned dist, unsigned len, unsigned left); 92 #endif 93 #ifdef X86_AVX_CHUNKSET 94 extern uint32_t chunksize_avx(void); 95 extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len); 96 extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); 97 extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len); 98 extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len); 99 extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left); 100 #endif 101 #ifdef ARM_NEON_CHUNKSET 102 extern uint32_t chunksize_neon(void); 103 extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len); 104 extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); 105 extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len); 106 extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len); 107 extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); 108 #endif 109 #ifdef POWER8_VSX_CHUNKSET 110 extern uint32_t chunksize_power8(void); 111 extern uint8_t* chunkcopy_power8(uint8_t *out, uint8_t const *from, unsigned len); 112 extern uint8_t* chunkcopy_safe_power8(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe); 113 extern uint8_t* chunkunroll_power8(uint8_t *out, unsigned *dist, unsigned *len); 114 extern uint8_t* chunkmemset_power8(uint8_t *out, unsigned dist, unsigned len); 115 extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); 116 #endif 117 118 /* CRC32 */ 119 typedef uint32_t (*crc32_func)(uint32_t crc32, const unsigned char * buf, uint64_t len); 120 121 extern uint32_t crc32_braid(uint32_t crc, const unsigned char *buf, uint64_t len); 122 #ifdef ARM_ACLE_CRC_HASH 123 extern uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len); 124 #elif defined(POWER8_VSX_CRC32) 125 extern uint32_t crc32_power8(uint32_t crc, const unsigned char *buf, uint64_t len); 126 #elif defined(S390_CRC32_VX) 127 extern uint32_t PREFIX(s390_crc32_vx)(uint32_t crc, const unsigned char *buf, uint64_t len); 128 #endif 129 130 /* compare256 */ 131 typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); 132 133 extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); 134 #ifdef UNALIGNED_OK 135 extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); 136 #ifdef HAVE_BUILTIN_CTZ 137 extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); 138 #endif 139 #if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) 140 extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); 141 #endif 142 #endif 143 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) 144 extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); 145 #endif 146 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) 147 extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); 148 #endif 149 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) 150 extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); 151 #endif 152 #ifdef POWER9 153 extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); 154 #endif 155 156 #ifdef DEFLATE_H_ 157 /* insert_string */ 158 extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); 159 #ifdef X86_SSE42_CRC_HASH 160 extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count); 161 #elif defined(ARM_ACLE_CRC_HASH) 162 extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); 163 #endif 164 165 /* longest_match */ 166 extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); 167 #ifdef UNALIGNED_OK 168 extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); 169 #ifdef HAVE_BUILTIN_CTZ 170 extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); 171 #endif 172 #if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) 173 extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); 174 #endif 175 #endif 176 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) 177 extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); 178 #endif 179 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) 180 extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); 181 #endif 182 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) 183 extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); 184 #endif 185 #ifdef POWER9 186 extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); 187 #endif 188 189 /* longest_match_slow */ 190 extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); 191 #ifdef UNALIGNED_OK 192 extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); 193 extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); 194 #ifdef UNALIGNED64_OK 195 extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); 196 #endif 197 #endif 198 #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) 199 extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); 200 #endif 201 #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) 202 extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); 203 #endif 204 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) 205 extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); 206 #endif 207 #ifdef POWER9 208 extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); 209 #endif 210 211 /* quick_insert_string */ 212 extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); 213 #ifdef X86_SSE42_CRC_HASH 214 extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str); 215 #elif defined(ARM_ACLE_CRC_HASH) 216 extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); 217 #endif 218 219 /* slide_hash */ 220 typedef void (*slide_hash_func)(deflate_state *s); 221 222 #ifdef X86_SSE2 223 extern void slide_hash_sse2(deflate_state *s); 224 #elif defined(ARM_NEON_SLIDEHASH) 225 extern void slide_hash_neon(deflate_state *s); 226 #endif 227 #if defined(PPC_VMX_SLIDEHASH) 228 extern void slide_hash_vmx(deflate_state *s); 229 #endif 230 #if defined(POWER8_VSX_SLIDEHASH) 231 extern void slide_hash_power8(deflate_state *s); 232 #endif 233 #ifdef X86_AVX2 234 extern void slide_hash_avx2(deflate_state *s); 235 #endif 236 237 /* update_hash */ 238 extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); 239 #ifdef X86_SSE42_CRC_HASH 240 extern uint32_t update_hash_sse4(deflate_state *const s, uint32_t h, uint32_t val); 241 #elif defined(ARM_ACLE_CRC_HASH) 242 extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); 243 #endif 244 #endif 245 246 #endif 247