• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use super::Adler32Imp;
2 
3 /// Resolves update implementation if CPU supports simd128 instructions.
get_imp() -> Option<Adler32Imp>4 pub fn get_imp() -> Option<Adler32Imp> {
5   get_imp_inner()
6 }
7 
8 #[inline]
9 #[cfg(target_feature = "simd128")]
get_imp_inner() -> Option<Adler32Imp>10 fn get_imp_inner() -> Option<Adler32Imp> {
11   Some(imp::update)
12 }
13 
14 #[inline]
15 #[cfg(not(target_feature = "simd128"))]
get_imp_inner() -> Option<Adler32Imp>16 fn get_imp_inner() -> Option<Adler32Imp> {
17   None
18 }
19 
20 #[cfg(target_feature = "simd128")]
21 mod imp {
22   const MOD: u32 = 65521;
23   const NMAX: usize = 5552;
24   const BLOCK_SIZE: usize = 32;
25   const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
26 
27   #[cfg(target_arch = "wasm32")]
28   use core::arch::wasm32::*;
29   #[cfg(target_arch = "wasm64")]
30   use core::arch::wasm64::*;
31 
update(a: u16, b: u16, data: &[u8]) -> (u16, u16)32   pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
33     update_imp(a, b, data)
34   }
35 
36   #[inline]
37   #[target_feature(enable = "simd128")]
update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16)38   fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
39     let mut a = a as u32;
40     let mut b = b as u32;
41 
42     let chunks = data.chunks_exact(CHUNK_SIZE);
43     let remainder = chunks.remainder();
44     for chunk in chunks {
45       update_chunk_block(&mut a, &mut b, chunk);
46     }
47 
48     update_block(&mut a, &mut b, remainder);
49 
50     (a as u16, b as u16)
51   }
52 
update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8])53   fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
54     debug_assert_eq!(
55       chunk.len(),
56       CHUNK_SIZE,
57       "Unexpected chunk size (expected {}, got {})",
58       CHUNK_SIZE,
59       chunk.len()
60     );
61 
62     reduce_add_blocks(a, b, chunk);
63 
64     *a %= MOD;
65     *b %= MOD;
66   }
67 
update_block(a: &mut u32, b: &mut u32, chunk: &[u8])68   fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
69     debug_assert!(
70       chunk.len() <= CHUNK_SIZE,
71       "Unexpected chunk size (expected <= {}, got {})",
72       CHUNK_SIZE,
73       chunk.len()
74     );
75 
76     for byte in reduce_add_blocks(a, b, chunk) {
77       *a += *byte as u32;
78       *b += *a;
79     }
80 
81     *a %= MOD;
82     *b %= MOD;
83   }
84 
85   #[inline(always)]
reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8]86   fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
87     if chunk.len() < BLOCK_SIZE {
88       return chunk;
89     }
90 
91     let blocks = chunk.chunks_exact(BLOCK_SIZE);
92     let blocks_remainder = blocks.remainder();
93 
94     let weight_hi_v = get_weight_hi();
95     let weight_lo_v = get_weight_lo();
96 
97     let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0);
98     let mut a_v = u32x4(0, 0, 0, 0);
99     let mut b_v = u32x4(*b, 0, 0, 0);
100 
101     for block in blocks {
102       let block_ptr = block.as_ptr() as *const v128;
103       let v_lo = unsafe { block_ptr.read_unaligned() };
104       let v_hi = unsafe { block_ptr.add(1).read_unaligned() };
105 
106       p_v = u32x4_add(p_v, a_v);
107 
108       a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo));
109       let mad = i32x4_dot_i8x16(v_lo, weight_lo_v);
110       b_v = u32x4_add(b_v, mad);
111 
112       a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi));
113       let mad = i32x4_dot_i8x16(v_hi, weight_hi_v);
114       b_v = u32x4_add(b_v, mad);
115     }
116 
117     b_v = u32x4_add(b_v, u32x4_shl(p_v, 5));
118 
119     *a += reduce_add(a_v);
120     *b = reduce_add(b_v);
121 
122     blocks_remainder
123   }
124 
125   #[inline(always)]
i32x4_dot_i8x16(a: v128, b: v128) -> v128126   fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 {
127     let a_lo = u16x8_extend_low_u8x16(a);
128     let a_hi = u16x8_extend_high_u8x16(a);
129 
130     let b_lo = u16x8_extend_low_u8x16(b);
131     let b_hi = u16x8_extend_high_u8x16(b);
132 
133     let lo = i32x4_dot_i16x8(a_lo, b_lo);
134     let hi = i32x4_dot_i16x8(a_hi, b_hi);
135 
136     i32x4_add(lo, hi)
137   }
138 
139   #[inline(always)]
u32x4_extadd_quarters_u8x16(a: v128) -> v128140   fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 {
141     u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a))
142   }
143 
144   #[inline(always)]
reduce_add(v: v128) -> u32145   fn reduce_add(v: v128) -> u32 {
146     let arr: [u32; 4] = unsafe { std::mem::transmute(v) };
147     let mut sum = 0u32;
148     for val in arr {
149       sum = sum.wrapping_add(val);
150     }
151     sum
152   }
153 
154   #[inline(always)]
get_weight_lo() -> v128155   fn get_weight_lo() -> v128 {
156     u8x16(
157       32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
158     )
159   }
160 
161   #[inline(always)]
get_weight_hi() -> v128162   fn get_weight_hi() -> v128 {
163     u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
164   }
165 }
166 
167 #[cfg(test)]
168 mod tests {
169   use rand::Rng;
170 
171   #[test]
zeroes()172   fn zeroes() {
173     assert_sum_eq(&[]);
174     assert_sum_eq(&[0]);
175     assert_sum_eq(&[0, 0]);
176     assert_sum_eq(&[0; 100]);
177     assert_sum_eq(&[0; 1024]);
178     assert_sum_eq(&[0; 512 * 1024]);
179   }
180 
181   #[test]
ones()182   fn ones() {
183     assert_sum_eq(&[]);
184     assert_sum_eq(&[1]);
185     assert_sum_eq(&[1, 1]);
186     assert_sum_eq(&[1; 100]);
187     assert_sum_eq(&[1; 1024]);
188     assert_sum_eq(&[1; 512 * 1024]);
189   }
190 
191   #[test]
random()192   fn random() {
193     let mut random = [0; 512 * 1024];
194     rand::thread_rng().fill(&mut random[..]);
195 
196     assert_sum_eq(&random[..1]);
197     assert_sum_eq(&random[..100]);
198     assert_sum_eq(&random[..1024]);
199     assert_sum_eq(&random[..512 * 1024]);
200   }
201 
202   /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
203   #[test]
wiki()204   fn wiki() {
205     assert_sum_eq(b"Wikipedia");
206   }
207 
assert_sum_eq(data: &[u8])208   fn assert_sum_eq(data: &[u8]) {
209     if let Some(update) = super::get_imp() {
210       let (a, b) = update(1, 0, data);
211       let left = u32::from(b) << 16 | u32::from(a);
212       let right = adler::adler32_slice(data);
213 
214       assert_eq!(left, right, "len({})", data.len());
215     }
216   }
217 }
218