1 /* Optimized slide_hash for POWER processors
2 * Copyright (C) 2019-2020 IBM Corporation
3 * Author: Matheus Castanho <msc@linux.ibm.com>
4 * For conditions of distribution and use, see copyright notice in zlib.h
5 */
6
7 #ifdef POWER8_VSX_SLIDEHASH
8
9 #include <altivec.h>
10 #include "zbuild.h"
11 #include "deflate.h"
12
slide_hash_power8_loop(deflate_state * s,unsigned n_elems,Pos * table_end)13 static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
14 vector unsigned short vw, vm, *vp;
15 unsigned chunks;
16
17 /* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
18 * so instead of processing each of the n_elems in the hash table
19 * individually, we can do it in chunks of 8 with vector instructions.
20 *
21 * This function is only called from slide_hash_power8(), and both calls
22 * pass n_elems as a power of 2 higher than 2^7, as defined by
23 * deflateInit2_(), so n_elems will always be a multiple of 8. */
24 chunks = n_elems >> 3;
25 Assert(n_elems % 8 == 0, "Weird hash table size!");
26
27 /* This type casting is safe since s->w_size is always <= 64KB
28 * as defined by deflateInit2_() and Posf == unsigned short */
29 vw[0] = (Pos) s->w_size;
30 vw = vec_splat(vw,0);
31
32 vp = (vector unsigned short *) table_end;
33
34 do {
35 /* Processing 8 elements at a time */
36 vp--;
37 vm = *vp;
38
39 /* This is equivalent to: m >= w_size ? m - w_size : 0
40 * Since we are using a saturated unsigned subtraction, any
41 * values that are > w_size will be set to 0, while the others
42 * will be subtracted by w_size. */
43 *vp = vec_subs(vm,vw);
44 } while (--chunks);
45 }
46
slide_hash_power8(deflate_state * s)47 void Z_INTERNAL slide_hash_power8(deflate_state *s) {
48 unsigned int n;
49 Pos *p;
50
51 n = HASH_SIZE;
52 p = &s->head[n];
53 slide_hash_power8_loop(s,n,p);
54
55 n = s->w_size;
56 p = &s->prev[n];
57 slide_hash_power8_loop(s,n,p);
58 }
59
60 #endif /* POWER8_VSX_SLIDEHASH */
61