• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #[allow(unused_imports)]
2 use alloc::boxed::Box;
3 
4 /// The Hashtable trait used by the compression to store hashed bytes to their position.
5 /// `val` can be maximum the size of the input in bytes.
6 ///
7 /// `pos` can have a maximum value of u16::MAX or 65535
8 /// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right
9 /// shifting.
10 ///
11 /// Duplication dictionary size.
12 ///
13 /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and
14 /// thus collisions are more likely, hurting the compression ratio.
15 
16 /// hashes and right shifts to a maximum value of 16bit, 65535
17 /// The right shift is done in order to not exceed, the hashtables capacity
18 #[inline]
hash(sequence: u32) -> u3219 fn hash(sequence: u32) -> u32 {
20     (sequence.wrapping_mul(2654435761_u32)) >> 16
21 }
22 
23 /// hashes and right shifts to a maximum value of 16bit, 65535
24 /// The right shift is done in order to not exceed, the hashtables capacity
25 #[cfg(target_pointer_width = "64")]
26 #[inline]
hash5(sequence: usize) -> u3227 fn hash5(sequence: usize) -> u32 {
28     let primebytes = if cfg!(target_endian = "little") {
29         889523592379_usize
30     } else {
31         11400714785074694791_usize
32     };
33     (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32
34 }
35 
36 pub trait HashTable {
get_at(&self, pos: usize) -> usize37     fn get_at(&self, pos: usize) -> usize;
put_at(&mut self, pos: usize, val: usize)38     fn put_at(&mut self, pos: usize, val: usize);
39     #[allow(dead_code)]
clear(&mut self)40     fn clear(&mut self);
41     #[inline]
42     #[cfg(target_pointer_width = "64")]
get_hash_at(input: &[u8], pos: usize) -> usize43     fn get_hash_at(input: &[u8], pos: usize) -> usize {
44         hash5(super::compress::get_batch_arch(input, pos)) as usize
45     }
46     #[inline]
47     #[cfg(target_pointer_width = "32")]
get_hash_at(input: &[u8], pos: usize) -> usize48     fn get_hash_at(input: &[u8], pos: usize) -> usize {
49         hash(super::compress::get_batch(input, pos)) as usize
50     }
51 }
52 
53 const HASHTABLE_SIZE_4K: usize = 4 * 1024;
54 const HASHTABLE_BIT_SHIFT_4K: usize = 4;
55 
56 #[derive(Debug)]
57 #[repr(align(64))]
58 pub struct HashTable4KU16 {
59     dict: Box<[u16; HASHTABLE_SIZE_4K]>,
60 }
61 impl HashTable4KU16 {
62     #[inline]
new() -> Self63     pub fn new() -> Self {
64         // This generates more efficient assembly in contrast to Box::new(slice), because of an
65         // optmized call alloc_zeroed, vs. alloc + memset
66         // try_into is optimized away
67         let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
68             .into_boxed_slice()
69             .try_into()
70             .unwrap();
71         Self { dict }
72     }
73 }
74 impl HashTable for HashTable4KU16 {
75     #[inline]
get_at(&self, hash: usize) -> usize76     fn get_at(&self, hash: usize) -> usize {
77         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
78     }
79     #[inline]
put_at(&mut self, hash: usize, val: usize)80     fn put_at(&mut self, hash: usize, val: usize) {
81         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
82     }
83     #[inline]
clear(&mut self)84     fn clear(&mut self) {
85         self.dict.fill(0);
86     }
87     #[inline]
get_hash_at(input: &[u8], pos: usize) -> usize88     fn get_hash_at(input: &[u8], pos: usize) -> usize {
89         hash(super::get_batch(input, pos)) as usize
90     }
91 }
92 
93 #[derive(Debug)]
94 pub struct HashTable4K {
95     dict: Box<[u32; HASHTABLE_SIZE_4K]>,
96 }
97 impl HashTable4K {
98     #[inline]
new() -> Self99     pub fn new() -> Self {
100         let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
101             .into_boxed_slice()
102             .try_into()
103             .unwrap();
104         Self { dict }
105     }
106 
107     #[cold]
108     #[allow(dead_code)]
reposition(&mut self, offset: u32)109     pub fn reposition(&mut self, offset: u32) {
110         for i in self.dict.iter_mut() {
111             *i = i.saturating_sub(offset);
112         }
113     }
114 }
115 impl HashTable for HashTable4K {
116     #[inline]
get_at(&self, hash: usize) -> usize117     fn get_at(&self, hash: usize) -> usize {
118         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
119     }
120     #[inline]
put_at(&mut self, hash: usize, val: usize)121     fn put_at(&mut self, hash: usize, val: usize) {
122         self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32;
123     }
124     #[inline]
clear(&mut self)125     fn clear(&mut self) {
126         self.dict.fill(0);
127     }
128 }
129 
130 const HASHTABLE_SIZE_8K: usize = 8 * 1024;
131 const HASH_TABLE_BIT_SHIFT_8K: usize = 3;
132 
133 #[derive(Debug)]
134 pub struct HashTable8K {
135     dict: Box<[u32; HASHTABLE_SIZE_8K]>,
136 }
137 #[allow(dead_code)]
138 impl HashTable8K {
139     #[inline]
new() -> Self140     pub fn new() -> Self {
141         let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
142             .into_boxed_slice()
143             .try_into()
144             .unwrap();
145 
146         Self { dict }
147     }
148 }
149 impl HashTable for HashTable8K {
150     #[inline]
get_at(&self, hash: usize) -> usize151     fn get_at(&self, hash: usize) -> usize {
152         self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
153     }
154     #[inline]
put_at(&mut self, hash: usize, val: usize)155     fn put_at(&mut self, hash: usize, val: usize) {
156         self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
157     }
158     #[inline]
clear(&mut self)159     fn clear(&mut self) {
160         self.dict.fill(0);
161     }
162 }
163