• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* slide_hash_simd.h
2  *
3  * Copyright 2022 The Chromium Authors
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the Chromium source repository LICENSE file.
6  */
7 
8 #ifndef SLIDE_HASH_SIMD_H
9 #define SLIDE_HASH_SIMD_H
10 
11 #include "deflate.h"
12 
13 #ifndef INLINE
14 #if defined(_MSC_VER) && !defined(__clang__)
15 #define INLINE __inline
16 #else
17 #define INLINE inline
18 #endif
19 #endif
20 
21 #if defined(CPU_NO_SIMD)
22 
23 #error SIMD has been disabled for your build target
24 
25 #elif defined(DEFLATE_SLIDE_HASH_SSE2)
26 
27 #include <emmintrin.h>  /* SSE2 */
28 
29 #define Z_SLIDE_INIT_SIMD(wsize) _mm_set1_epi16((ush)(wsize))
30 
31 #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \
32     for (const Posf* const end = table + size; table != end;) { \
33         __m128i vO = _mm_loadu_si128((__m128i *)(table + 0)); \
34         vO = _mm_subs_epu16(vO, vector_wsize); \
35         _mm_storeu_si128((__m128i *)(table + 0), vO); \
36         table += 8; \
37     }
38 
39 typedef __m128i z_vec128i_u16x8_t;
40 
41 #elif defined(DEFLATE_SLIDE_HASH_NEON)
42 
43 #include <arm_neon.h>  /* NEON */
44 
45 #define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize))
46 
47 #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \
48     for (const Posf* const end = table + size; table != end;) { \
49         uint16x8_t vO = vld1q_u16(table + 0); \
50         uint16x8_t v8 = vld1q_u16(table + 8); \
51         vO = vqsubq_u16(vO, vector_wsize); \
52         v8 = vqsubq_u16(v8, vector_wsize); \
53         vst1q_u16(table + 0, vO); \
54         vst1q_u16(table + 8, v8); \
55         table += 8 + 8; \
56     }
57 
58 typedef uint16x8_t z_vec128i_u16x8_t;
59 
60 #else
61 
62 #error slide_hash_simd is not defined for your build target
63 
64 #endif
65 
66 /* ===========================================================================
67  * Slide the hash table when sliding the window down (could be avoided with 32
68  * bit values at the expense of memory usage). We slide even when level == 0 to
69  * keep the hash table consistent if we switch back to level > 0 later.
70  */
slide_hash_simd(Posf * head,Posf * prev,const uInt w_size,const uInt hash_size)71 local INLINE void slide_hash_simd(
72     Posf *head, Posf *prev, const uInt w_size, const uInt hash_size) {
73     /*
74      * The SIMD implementation of the hash table slider assumes:
75      *
76      * 1. hash chain offset is 2 bytes. Should be true as Pos is "ush" type.
77      */
78     Assert(sizeof(Pos) == 2, "Pos type size error: should be 2 bytes");
79     Assert(sizeof(ush) == 2, "ush type size error: should be 2 bytes");
80 
81     Assert(hash_size <= (1 << 16), "Hash table maximum size error");
82     Assert(hash_size >= (1 << 8), "Hash table minimum size error");
83     Assert(w_size == (ush)w_size, "Prev table size error");
84 
85     /*
86      * 2. The hash & prev table sizes are a multiple of 32 bytes (256 bits),
87      * since the NEON table slider moves two 128-bit items per loop (loop is
88      * unrolled on NEON for performance, see http://crbug.com/863257).
89      */
90     Assert(!((hash_size * sizeof(head[0])) & (32 - 1)),
91         "Hash table size error: should be a multiple of 32 bytes");
92     Assert(!((w_size * sizeof(prev[0])) & (32 - 1)),
93         "Prev table size error: should be a multiple of 32 bytes");
94 
95     /*
96      * Duplicate (ush)w_size in each uint16_t component of a 128-bit vector.
97      */
98     const z_vec128i_u16x8_t vec_wsize = Z_SLIDE_INIT_SIMD(w_size);
99 
100     /*
101      * Slide {head,prev} hash chain values: subtracts (ush)w_size from every
102      * value with a saturating SIMD subtract, to clamp the result to 0(NIL),
103      * to implement slide_hash() `(m >= wsize ? m - wsize : NIL);` code.
104      */
105     Z_SLIDE_HASH_SIMD(head, hash_size, vec_wsize);
106 #ifndef FASTEST
107     Z_SLIDE_HASH_SIMD(prev, w_size, vec_wsize);
108 #endif
109 
110 }
111 
112 #undef z_vec128i_u16x8_t
113 #undef Z_SLIDE_HASH_SIMD
114 #undef Z_SLIDE_INIT_SIMD
115 
116 #endif  /* SLIDE_HASH_SIMD_H */
117