• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* compare256_power9.c - Power9 version of compare256
2  * Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #ifdef POWER9
7 #include <altivec.h>
8 #include "../../zbuild.h"
9 #include "../../zendian.h"
10 
11 /* Older versions of GCC misimplemented semantics for these bit counting builtins.
12  * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */
13 #if defined(__GNUC__) && (__GNUC__ < 12)
14 #  define zng_vec_vctzlsbb(vc, len) __asm__ volatile("vctzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc))
15 #  define zng_vec_vclzlsbb(vc, len) __asm__ volatile("vclzlsbb %0, %1\n\t" : "=r" (len) : "v" (vc))
16 #else
17 #  define zng_vec_vctzlsbb(vc, len) len = __builtin_vec_vctzlsbb(vc)
18 #  define zng_vec_vclzlsbb(vc, len) len = __builtin_vec_vclzlsbb(vc)
19 #endif
20 
compare256_power9_static(const uint8_t * src0,const uint8_t * src1)21 static inline uint32_t compare256_power9_static(const uint8_t *src0, const uint8_t *src1) {
22     uint32_t len = 0, cmplen;
23 
24     do {
25         vector unsigned char vsrc0, vsrc1, vc;
26 
27         vsrc0 = *((vector unsigned char *)src0);
28         vsrc1 = *((vector unsigned char *)src1);
29 
30         /* Compare 16 bytes at a time. Each byte of vc will be either
31          * all ones or all zeroes, depending on the result of the comparison. */
32         vc = (vector unsigned char)vec_cmpne(vsrc0, vsrc1);
33 
34         /* Since the index of matching bytes will contain only zeroes
35          * on vc (since we used cmpne), counting the number of consecutive
36          * bytes where LSB == 0 is the same as counting the length of the match. */
37 #if BYTE_ORDER == LITTLE_ENDIAN
38         zng_vec_vctzlsbb(vc, cmplen);
39 #else
40         zng_vec_vclzlsbb(vc, cmplen);
41 #endif
42         if (cmplen != 16)
43             return len + cmplen;
44 
45         src0 += 16, src1 += 16, len += 16;
46     } while (len < 256);
47 
48    return 256;
49 }
50 
compare256_power9(const uint8_t * src0,const uint8_t * src1)51 Z_INTERNAL uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1) {
52     return compare256_power9_static(src0, src1);
53 }
54 
55 #define LONGEST_MATCH       longest_match_power9
56 #define COMPARE256          compare256_power9_static
57 
58 #include "match_tpl.h"
59 
60 #define LONGEST_MATCH_SLOW
61 #define LONGEST_MATCH       longest_match_slow_power9
62 #define COMPARE256          compare256_power9_static
63 
64 #include "match_tpl.h"
65 
66 #endif
67