• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use super::Adler32Imp;
2 
3 /// Resolves update implementation if CPU supports ssse3 instructions.
get_imp() -> Option<Adler32Imp>4 pub fn get_imp() -> Option<Adler32Imp> {
5   get_imp_inner()
6 }
7 
8 #[inline]
9 #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
get_imp_inner() -> Option<Adler32Imp>10 fn get_imp_inner() -> Option<Adler32Imp> {
11   if std::is_x86_feature_detected!("ssse3") {
12     Some(imp::update)
13   } else {
14     None
15   }
16 }
17 
18 #[inline]
19 #[cfg(all(
20   target_feature = "ssse3",
21   not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
22 ))]
get_imp_inner() -> Option<Adler32Imp>23 fn get_imp_inner() -> Option<Adler32Imp> {
24   Some(imp::update)
25 }
26 
27 #[inline]
28 #[cfg(all(
29   not(target_feature = "ssse3"),
30   not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))
31 ))]
get_imp_inner() -> Option<Adler32Imp>32 fn get_imp_inner() -> Option<Adler32Imp> {
33   None
34 }
35 
36 #[cfg(all(
37   any(target_arch = "x86", target_arch = "x86_64"),
38   any(feature = "std", target_feature = "ssse3")
39 ))]
40 mod imp {
41   const MOD: u32 = 65521;
42   const NMAX: usize = 5552;
43   const BLOCK_SIZE: usize = 32;
44   const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
45 
46   #[cfg(target_arch = "x86")]
47   use core::arch::x86::*;
48   #[cfg(target_arch = "x86_64")]
49   use core::arch::x86_64::*;
50 
update(a: u16, b: u16, data: &[u8]) -> (u16, u16)51   pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
52     unsafe { update_imp(a, b, data) }
53   }
54 
55   #[inline]
56   #[target_feature(enable = "ssse3")]
update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16)57   unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
58     let mut a = a as u32;
59     let mut b = b as u32;
60 
61     let chunks = data.chunks_exact(CHUNK_SIZE);
62     let remainder = chunks.remainder();
63     for chunk in chunks {
64       update_chunk_block(&mut a, &mut b, chunk);
65     }
66 
67     update_block(&mut a, &mut b, remainder);
68 
69     (a as u16, b as u16)
70   }
71 
update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8])72   unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
73     debug_assert_eq!(
74       chunk.len(),
75       CHUNK_SIZE,
76       "Unexpected chunk size (expected {}, got {})",
77       CHUNK_SIZE,
78       chunk.len()
79     );
80 
81     reduce_add_blocks(a, b, chunk);
82 
83     *a %= MOD;
84     *b %= MOD;
85   }
86 
update_block(a: &mut u32, b: &mut u32, chunk: &[u8])87   unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
88     debug_assert!(
89       chunk.len() <= CHUNK_SIZE,
90       "Unexpected chunk size (expected <= {}, got {})",
91       CHUNK_SIZE,
92       chunk.len()
93     );
94 
95     for byte in reduce_add_blocks(a, b, chunk) {
96       *a += *byte as u32;
97       *b += *a;
98     }
99 
100     *a %= MOD;
101     *b %= MOD;
102   }
103 
104   #[inline(always)]
reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8]105   unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
106     if chunk.len() < BLOCK_SIZE {
107       return chunk;
108     }
109 
110     let blocks = chunk.chunks_exact(BLOCK_SIZE);
111     let blocks_remainder = blocks.remainder();
112 
113     let one_v = _mm_set1_epi16(1);
114     let zero_v = _mm_set1_epi16(0);
115     let weight_hi_v = get_weight_hi();
116     let weight_lo_v = get_weight_lo();
117 
118     let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _);
119     let mut a_v = _mm_set_epi32(0, 0, 0, 0);
120     let mut b_v = _mm_set_epi32(0, 0, 0, *b as _);
121 
122     for block in blocks {
123       let block_ptr = block.as_ptr() as *const _;
124       let left_v = _mm_loadu_si128(block_ptr);
125       let right_v = _mm_loadu_si128(block_ptr.add(1));
126 
127       p_v = _mm_add_epi32(p_v, a_v);
128 
129       a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v));
130       let mad = _mm_maddubs_epi16(left_v, weight_hi_v);
131       b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v));
132 
133       a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v));
134       let mad = _mm_maddubs_epi16(right_v, weight_lo_v);
135       b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v));
136     }
137 
138     b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5));
139 
140     *a += reduce_add(a_v);
141     *b = reduce_add(b_v);
142 
143     blocks_remainder
144   }
145 
146   #[inline(always)]
reduce_add(v: __m128i) -> u32147   unsafe fn reduce_add(v: __m128i) -> u32 {
148     let hi = _mm_unpackhi_epi64(v, v);
149     let sum = _mm_add_epi32(hi, v);
150     let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1));
151     let sum = _mm_add_epi32(sum, hi);
152 
153     _mm_cvtsi128_si32(sum) as _
154   }
155 
156   #[inline(always)]
get_weight_lo() -> __m128i157   unsafe fn get_weight_lo() -> __m128i {
158     _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
159   }
160 
161   #[inline(always)]
get_weight_hi() -> __m128i162   unsafe fn get_weight_hi() -> __m128i {
163     _mm_set_epi8(
164       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
165     )
166   }
167 }
168 
169 #[cfg(test)]
170 mod tests {
171   use rand::Rng;
172 
173   #[test]
zeroes()174   fn zeroes() {
175     assert_sum_eq(&[]);
176     assert_sum_eq(&[0]);
177     assert_sum_eq(&[0, 0]);
178     assert_sum_eq(&[0; 100]);
179     assert_sum_eq(&[0; 1024]);
180     assert_sum_eq(&[0; 1024 * 1024]);
181   }
182 
183   #[test]
ones()184   fn ones() {
185     assert_sum_eq(&[]);
186     assert_sum_eq(&[1]);
187     assert_sum_eq(&[1, 1]);
188     assert_sum_eq(&[1; 100]);
189     assert_sum_eq(&[1; 1024]);
190     assert_sum_eq(&[1; 1024 * 1024]);
191   }
192 
193   #[test]
random()194   fn random() {
195     let mut random = [0; 1024 * 1024];
196     rand::thread_rng().fill(&mut random[..]);
197 
198     assert_sum_eq(&random[..1]);
199     assert_sum_eq(&random[..100]);
200     assert_sum_eq(&random[..1024]);
201     assert_sum_eq(&random[..1024 * 1024]);
202   }
203 
204   /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
205   #[test]
wiki()206   fn wiki() {
207     assert_sum_eq(b"Wikipedia");
208   }
209 
assert_sum_eq(data: &[u8])210   fn assert_sum_eq(data: &[u8]) {
211     if let Some(update) = super::get_imp() {
212       let (a, b) = update(1, 0, data);
213       let left = u32::from(b) << 16 | u32::from(a);
214       let right = adler::adler32_slice(data);
215 
216       assert_eq!(left, right, "len({})", data.len());
217     }
218   }
219 }
220