• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Optimized slide_hash for POWER processors
2  * Copyright (C) 2019-2020 IBM Corporation
3  * Author: Matheus Castanho <msc@linux.ibm.com>
4  * For conditions of distribution and use, see copyright notice in zlib.h
5  */
6 
7 #ifdef POWER8_VSX_SLIDEHASH
8 
9 #include <altivec.h>
10 #include "zbuild.h"
11 #include "deflate.h"
12 
slide_hash_power8_loop(deflate_state * s,unsigned n_elems,Pos * table_end)13 static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
14     vector unsigned short vw, vm, *vp;
15     unsigned chunks;
16 
17     /* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
18      * so instead of processing each of the n_elems in the hash table
19      * individually, we can do it in chunks of 8 with vector instructions.
20      *
21      * This function is only called from slide_hash_power8(), and both calls
22      * pass n_elems as a power of 2 higher than 2^7, as defined by
23      * deflateInit2_(), so n_elems will always be a multiple of 8. */
24     chunks = n_elems >> 3;
25     Assert(n_elems % 8 == 0, "Weird hash table size!");
26 
27     /* This type casting is safe since s->w_size is always <= 64KB
28      * as defined by deflateInit2_() and Posf == unsigned short */
29     vw[0] = (Pos) s->w_size;
30     vw = vec_splat(vw,0);
31 
32     vp = (vector unsigned short *) table_end;
33 
34     do {
35         /* Processing 8 elements at a time */
36         vp--;
37         vm = *vp;
38 
39         /* This is equivalent to: m >= w_size ? m - w_size : 0
40          * Since we are using a saturated unsigned subtraction, any
41          * values that are > w_size will be set to 0, while the others
42          * will be subtracted by w_size. */
43         *vp = vec_subs(vm,vw);
44     } while (--chunks);
45 }
46 
slide_hash_power8(deflate_state * s)47 void Z_INTERNAL slide_hash_power8(deflate_state *s) {
48     unsigned int n;
49     Pos *p;
50 
51     n = HASH_SIZE;
52     p = &s->head[n];
53     slide_hash_power8_loop(s,n,p);
54 
55     n = s->w_size;
56     p = &s->prev[n];
57     slide_hash_power8_loop(s,n,p);
58 }
59 
60 #endif /* POWER8_VSX_SLIDEHASH */
61