• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 //! [Huffman coding] implementation of the HTTP/2 protocol.
15 //!
16 //! [Huffman Coding]: https://en.wikipedia.org/wiki/Huffman_coding
17 //!
18 //! # What is Huffman coding
19 //! In computer science and information theory, a `Huffman code` is a particular
20 //! type of optimal prefix code that is commonly used for lossless data
21 //! compression. The process of finding or using such a code proceeds by means
22 //! of `Huffman coding`, an algorithm developed by David A. Huffman while he was
23 //! a Sc.D. student at MIT, and published in the 1952 paper "A Method for the
24 //! Construction of Minimum-Redundancy Codes".
25 //!
26 //! # Huffman code in Http/2
27 //! There is a table of Huffman code in `RFC7541`. This [Huffman code] was
28 //! generated from statistics obtained on a large sample of HTTP headers. It is
29 //! a canonical Huffman code with some tweaking to ensure that no symbol has a
30 //! unique code length.
31 //!
32 //! [Huffman Code]: https://www.rfc-editor.org/rfc/rfc7541.html#ref-HUFFMAN
33 
34 // TODO: Introduction of `Huffman code in Http/3`.
35 
36 mod consts;
37 
38 use core::cmp::Ordering;
39 
40 use consts::{HUFFMAN_DECODE, HUFFMAN_ENCODE};
41 
42 /// Converts a string to a Huffman code, and then put it into the
43 /// specified `Vec<u8>`.
huffman_encode(src: &[u8], dst: &mut Vec<u8>)44 pub(crate) fn huffman_encode(src: &[u8], dst: &mut Vec<u8>) {
45     // We use `state` to hold temporary encoding state.
46     // We use `unfilled` to represent the remaining number of bits that is not
47     // filled. Each time any bytes are encoded, we will store the result bits
48     // in `state`.
49     //
50     // When `state` is not full, we add the result bits to `Unfilled`.
51     // `state`:
52     // +----------+----------+----------------------------+
53     // | Result A | Result B |          Unfilled          |
54     // +----------+----------+----------------------------+
55     // |<-------------------  64 bits  ------------------->
56     //
57     // When the length of the result bits is greater than the length of `Unfilled`,
58     // we will truncate it.
59     // `state`:
60     // +---------------------+----------------------------+
61     // |                     |     A part of Result C     | -> Output it.
62     // +---------------------+----------------------------+
63     // |<--------------  full 64 bits  ------------------->
64     //
65     // Final `state`:
66     // +--------------------------------+-----------------+
67     // | The remaining part of Result C |     Unfilled    |
68     // +--------------------------------+-----------------+
69 
70     let mut state = 0u64;
71     // The initial value of `unfilled` is equal to the number of bits in the
72     // `state`.
73     let mut unfilled = 64;
74 
75     for byte in src.iter() {
76         let (nbits, code) = HUFFMAN_ENCODE[*byte as usize];
77         match unfilled.cmp(&nbits) {
78             Ordering::Greater => {
79                 state |= code << (unfilled - nbits);
80                 unfilled -= nbits;
81             }
82             Ordering::Equal => {
83                 state |= code;
84                 dst.extend_from_slice(&state.to_be_bytes());
85                 state = 0;
86                 unfilled = 64;
87             }
88             // We rotate the `code` to the right, and we will get `rotate`.
89             // `rotate`:
90             // +---------+-----------------+----------+
91             // | Parts A |                 |  Parts B |
92             // +---------+-----------------+----------+
93             // `mask`:
94             // +---------+-----------------+----------+
95             // | 000...0 |         111...1            |
96             // +---------+-----------------+----------+
97             // `rotate` & mask => Parts B
98             // `rotate` & !mask => Parts A
99             Ordering::Less => {
100                 let rotate = code.rotate_right((nbits - unfilled) as u32);
101                 let mask = u64::MAX >> (64 - unfilled);
102                 state |= rotate & mask;
103                 dst.extend_from_slice(&state.to_be_bytes());
104                 state = rotate & !mask;
105                 unfilled = 64 - (nbits - unfilled);
106             }
107         }
108     }
109 
110     // At the end of character decoding, if the last byte is not completely
111     // filled, it needs to be filled with `0b1`.
112     if unfilled != 64 {
113         state |= u64::MAX >> (64 - unfilled);
114         let bytes = &state.to_be_bytes();
115         // Here we only need to output the filled bytes, not all the `state`.
116         let len = (8 - (unfilled >> 3)) as usize;
117         dst.extend_from_slice(&bytes.as_slice()[..len]);
118     }
119 }
120 
121 /// Converts a Huffman code into a literal string at one time, and then put it
122 /// into the specified `Vec<u8>`.
123 ///
124 /// The algorithm comes from crate [h2].
125 ///
126 /// [h2]: https://crates.io/crates/h2
huffman_decode(src: &[u8], dst: &mut Vec<u8>) -> Result<(), HuffmanDecodeError>127 pub(crate) fn huffman_decode(src: &[u8], dst: &mut Vec<u8>) -> Result<(), HuffmanDecodeError> {
128     // We use a state machine to parse Huffman code.
129     // We have a `HUFFMAN_DECODE` table, which contains three elements:
130     // `State`, `Decoded Byte`, and `Flags`.
131     //
132     // `State` represents the current state. It can be considered as the value
133     // composed of the remaining unparsed bits after the last parsing is completed.
134     //
135     // `Decoded Byte` represents the decoded character when the `Decoded` bit
136     // of `Flags` is set to `0b1`.
137     //
138     // `Flags` contains three bits: `MAYBE_EOS`(0x1), `DECODED`(0x2), `ERROR`(0x4).
139     //
140     // When `MAYBE_EOS` is set, it means that the current character may be the
141     // end of the literal string.
142     //
143     // When `DECODED` is set, it means that the character at `Decoded Bytes` is
144     // the current decoded character.
145     //
146     // When `ERROR` is set, it means that there is a wrong Huffman code in the
147     // byte sequence.
148     //
149     // We use a variable called `state` to hold the current state. Its initial
150     // value is 0. Every time we take out 4 bits from the byte sequence that
151     // needs to be parsed and look it up in the `HUFFMAN_DECODE` table. Then we
152     // will get `state`, `byte` and `flags`. We can judge the current state
153     // through `flags` and choose to continue decoding or return an error.
154 
155     let (state, flags) = huffman_decode_inner(src, dst, 0, 0)?;
156 
157     // The decoding operation succeeds in the following two cases:
158     // 1. `state` is 0. It means that all bits are parsed normally.
159     // 2. `state` is not 0, but `maybe_eos` is true. It means that not all bits
160     // are parsed and the last byte can be the terminator. The remaining bits are
161     // allowed because a part of the padding is done when http/2 performs
162     // Huffman encoding.
163     if state != 0 && (flags & 0x1) == 0 {
164         return Err(HuffmanDecodeError::InvalidHuffmanCode);
165     }
166 
167     Ok(())
168 }
169 
huffman_decode_inner( src: &[u8], dst: &mut Vec<u8>, state: u8, flags: u8, ) -> Result<(u8, u8), HuffmanDecodeError>170 fn huffman_decode_inner(
171     src: &[u8],
172     dst: &mut Vec<u8>,
173     state: u8,
174     flags: u8,
175 ) -> Result<(u8, u8), HuffmanDecodeError> {
176     let (mut state, mut _result, mut flags) = (state, 0, flags);
177 
178     for byte in src.iter() {
179         let left = byte >> 4;
180         let right = byte & 0xf;
181 
182         (state, _result, flags) = HUFFMAN_DECODE[state as usize][left as usize];
183         if (flags & 0x4) == 0x4 {
184             return Err(HuffmanDecodeError::InvalidHuffmanCode);
185         }
186         if (flags & 0x2) == 0x2 {
187             dst.push(_result);
188         }
189 
190         (state, _result, flags) = HUFFMAN_DECODE[state as usize][right as usize];
191         if (flags & 0x4) == 0x4 {
192             return Err(HuffmanDecodeError::InvalidHuffmanCode);
193         }
194         if (flags & 0x2) == 0x2 {
195             dst.push(_result);
196         }
197     }
198     Ok((state, flags))
199 }
200 
201 /// Converts a Huffman code into a literal string, and then put it into the
202 /// specified `Vec<u8>`. Users can split the string into multiple slices and
203 /// then pass them into `HuffmanDecoder` to get the result.
204 ///
205 /// The algorithm comes from crate [h2].
206 ///
207 /// [h2]: https://crates.io/crates/h2
208 pub(crate) struct HuffmanDecoder {
209     state: u8,
210     flags: u8,
211     vec: Vec<u8>,
212 }
213 
214 impl HuffmanDecoder {
215     /// Creates a new, empty `HuffmanDecoder`.
new() -> Self216     pub(crate) fn new() -> Self {
217         Self {
218             state: 0,
219             flags: 0,
220             vec: Vec::new(),
221         }
222     }
223 
224     /// Decodes input string. Stop when the `src` is used up.
decode(&mut self, src: &[u8]) -> Result<(), HuffmanDecodeError>225     pub(crate) fn decode(&mut self, src: &[u8]) -> Result<(), HuffmanDecodeError> {
226         (self.state, self.flags) =
227             huffman_decode_inner(src, &mut self.vec, self.state, self.flags)?;
228         Ok(())
229     }
230 
231     /// Finishes decoding and get the decoded result.
finish(self) -> Result<Vec<u8>, HuffmanDecodeError>232     pub(crate) fn finish(self) -> Result<Vec<u8>, HuffmanDecodeError> {
233         if self.state != 0 && (self.flags & 0x1) == 0 {
234             return Err(HuffmanDecodeError::InvalidHuffmanCode);
235         }
236         Ok(self.vec)
237     }
238 }
239 
240 /// Possible errors in Huffman decoding operations.
241 #[derive(Debug)]
242 pub(crate) enum HuffmanDecodeError {
243     InvalidHuffmanCode,
244 }
245 
246 #[cfg(test)]
247 mod ut_huffman {
248     use super::{huffman_decode, huffman_encode, HuffmanDecoder};
249     use crate::util::test_util::decode;
250 
251     /// UT test cases for `huffman_encode`.
252     ///
253     /// # Brief
254     /// 1. Calls `huffman_encode` function, passing in the specified parameters.
255     /// 2. Checks if the test results are correct.
256     #[test]
ut_huffman_encode()257     fn ut_huffman_encode() {
258         rfc7541_test_cases();
259 
260         macro_rules! huffman_test_case {
261             ($ctn: expr, $res: expr $(,)?) => {
262                 let mut vec = Vec::new();
263                 huffman_encode($ctn.as_bytes(), &mut vec);
264                 assert_eq!(vec, decode($res).unwrap())
265             };
266         }
267 
268         /// The following test cases are from RFC7541.
269         fn rfc7541_test_cases() {
270             // C.4.1 First Request
271             huffman_test_case!("www.example.com", "f1e3c2e5f23a6ba0ab90f4ff");
272 
273             // C.4.2 Second Request
274             huffman_test_case!("no-cache", "a8eb10649cbf");
275 
276             // C.4.3 Third Request
277             huffman_test_case!("custom-value", "25a849e95bb8e8b4bf");
278 
279             // C.6.1 First Response
280             huffman_test_case!("302", "6402");
281             huffman_test_case!("private", "aec3771a4b");
282             huffman_test_case!(
283                 "Mon, 21 Oct 2013 20:13:21 GMT",
284                 "d07abe941054d444a8200595040b8166e082a62d1bff"
285             );
286             huffman_test_case!(
287                 "https://www.example.com",
288                 "9d29ad171863c78f0b97c8e9ae82ae43d3"
289             );
290 
291             // C.6.2 Second Response
292             huffman_test_case!("307", "640eff");
293 
294             // C.6.3 Third Response
295             huffman_test_case!("gzip", "9bd9ab");
296             huffman_test_case!(
297                 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1",
298                 "94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007",
299             );
300         }
301     }
302 
303     /// UT test cases for `huffman_decode`.
304     ///
305     /// # Brief
306     /// 1. Calls `huffman_decode` function, passing in the specified parameters.
307     /// 2. Checks if the test results are correct.
308     #[test]
ut_huffman_decode()309     fn ut_huffman_decode() {
310         rfc7541_test_cases();
311 
312         macro_rules! huffman_test_case {
313             ($ctn: expr, $res: expr $(,)?) => {
314                 let mut vec = Vec::new();
315                 huffman_decode(decode($ctn).unwrap().as_slice(), &mut vec).unwrap();
316                 assert_eq!(vec.as_slice(), $res.as_bytes())
317             };
318         }
319 
320         /// The following test cases are from RFC7541.
321         fn rfc7541_test_cases() {
322             // C.4.1 First Request
323             huffman_test_case!("f1e3c2e5f23a6ba0ab90f4ff", "www.example.com");
324 
325             // C.4.2 Second Request
326             huffman_test_case!("a8eb10649cbf", "no-cache");
327 
328             // C.4.3 Third Request
329             huffman_test_case!("25a849e95bb8e8b4bf", "custom-value");
330 
331             // C.6.1 First Response
332             huffman_test_case!("6402", "302");
333             huffman_test_case!("aec3771a4b", "private");
334             huffman_test_case!(
335                 "d07abe941054d444a8200595040b8166e082a62d1bff",
336                 "Mon, 21 Oct 2013 20:13:21 GMT"
337             );
338             huffman_test_case!(
339                 "9d29ad171863c78f0b97c8e9ae82ae43d3",
340                 "https://www.example.com",
341             );
342 
343             // C.6.2 Second Response
344             huffman_test_case!("640eff", "307");
345 
346             // C.6.3 Third Response
347             huffman_test_case!("9bd9ab", "gzip");
348             huffman_test_case!(
349                 "94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007",
350                 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"
351             );
352         }
353     }
354 
355     /// UT test cases for `HuffmanDecoder::decode`.
356     ///
357     /// # Brief
358     /// 1. Creates a `HuffmanDecoder`.
359     /// 1. Calls `decode` and `finish` function, passing in the specified
360     ///    parameters.
361     /// 2. Checks if the test results are correct.
362     #[test]
ut_huffman_decoder()363     fn ut_huffman_decoder() {
364         rfc7541_test_cases();
365         slices_test();
366 
367         macro_rules! huffman_test_case {
368             ($content: expr, $result: expr) => {{
369                 let mut decoder = HuffmanDecoder::new();
370                 for cont in $content.as_slice().iter() {
371                     let bytes = decode(cont).unwrap();
372                     assert!(decoder.decode(&bytes).is_ok());
373                 }
374                 match decoder.finish() {
375                     Ok(vec) => vec == $result.as_bytes(),
376                     _ => panic!(),
377                 }
378             }};
379         }
380 
381         /// The following test cases are from RFC7541.
382         fn rfc7541_test_cases() {
383             // C.4.1 First Request
384             huffman_test_case!(["f1e3c2e5f23a6ba0ab90f4ff"], "www.example.com");
385 
386             // C.4.2 Second Request
387             huffman_test_case!(["a8eb10649cbf"], "no-cache");
388 
389             // C.4.3 Third Request
390             huffman_test_case!(["25a849e95bb8e8b4bf"], "custom-value");
391 
392             // C.6.1 First Response
393             huffman_test_case!(["6402"], "302");
394             huffman_test_case!(["aec3771a4b"], "private");
395             huffman_test_case!(
396                 ["d07abe941054d444a8200595040b8166e082a62d1bff"],
397                 "Mon, 21 Oct 2013 20:13:21 GMT"
398             );
399             huffman_test_case!(
400                 ["9d29ad171863c78f0b97c8e9ae82ae43d3"],
401                 "https://www.example.com"
402             );
403 
404             // C.6.2 Second Response
405             huffman_test_case!(["640eff"], "307");
406 
407             // C.6.3 Third Response
408             huffman_test_case!(["9bd9ab"], "gzip");
409             huffman_test_case!(
410                 ["94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007"],
411                 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"
412             );
413         }
414 
415         /// The following test cases is for testing segmented byte slices.
416         fn slices_test() {
417             // Fragmentation
418             huffman_test_case!(["a8", "eb", "10", "64", "9c", "bf"], "no-cache");
419 
420             // Fragmentation + Blank
421             huffman_test_case!(
422                 ["", "", "", "", "a8", "", "eb", "10", "", "64", "9c", "", "bf", "", ""],
423                 "no-cache"
424             );
425         }
426     }
427 }
428