1 /* compare256_power9.c - Power9 version of compare256
2 * Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #ifdef POWER9
7 #include <altivec.h>
8 #include "../../zbuild.h"
9 #include "../../zendian.h"
10
11 /* Older versions of GCC misimplemented semantics for these bit counting builtins.
12 * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */
13 #if defined(__GNUC__) && (__GNUC__ < 12)
14 # define zng_vec_vctzlsbb(vc, len) __asm__ volatile("vctzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc))
15 # define zng_vec_vclzlsbb(vc, len) __asm__ volatile("vclzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc))
16 #else
17 # define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vctzlsbb(vc)
18 # define zng_vec_vclzlsbb(vc, len) len = __builtin_vec_vclzlsbb(vc)
19 #endif
20
compare256_power9_static(const uint8_t * src0,const uint8_t * src1)21 static inline uint32_t compare256_power9_static(const uint8_t *src0, const uint8_t *src1) {
22 uint32_t len = 0, cmplen;
23
24 do {
25 vector unsigned char vsrc0, vsrc1, vc;
26
27 vsrc0 = *((vector unsigned char *)src0);
28 vsrc1 = *((vector unsigned char *)src1);
29
30 /* Compare 16 bytes at a time. Each byte of vc will be either
31 * all ones or all zeroes, depending on the result of the comparison. */
32 vc = (vector unsigned char)vec_cmpne(vsrc0, vsrc1);
33
34 /* Since the index of matching bytes will contain only zeroes
35 * on vc (since we used cmpne), counting the number of consecutive
36 * bytes where LSB == 0 is the same as counting the length of the match. */
37 #if BYTE_ORDER == LITTLE_ENDIAN
38 zng_vec_vctzlsbb(vc, cmplen);
39 #else
40 zng_vec_vclzlsbb(vc, cmplen);
41 #endif
42 if (cmplen != 16)
43 return len + cmplen;
44
45 src0 += 16, src1 += 16, len += 16;
46 } while (len < 256);
47
48 return 256;
49 }
50
compare256_power9(const uint8_t * src0,const uint8_t * src1)51 Z_INTERNAL uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1) {
52 return compare256_power9_static(src0, src1);
53 }
54
55 #define LONGEST_MATCH longest_match_power9
56 #define COMPARE256 compare256_power9_static
57
58 #include "match_tpl.h"
59
60 #define LONGEST_MATCH_SLOW
61 #define LONGEST_MATCH longest_match_slow_power9
62 #define COMPARE256 compare256_power9_static
63
64 #include "match_tpl.h"
65
66 #endif
67