1 // Copyright (c) 2023 Huawei Device Co., Ltd.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 //! [Huffman coding] implementation of the HTTP/2 protocol.
15 //!
16 //! [Huffman Coding]: https://en.wikipedia.org/wiki/Huffman_coding
17 //!
18 //! # What is Huffman coding
19 //! In computer science and information theory, a `Huffman code` is a particular
20 //! type of optimal prefix code that is commonly used for lossless data
21 //! compression. The process of finding or using such a code proceeds by means
22 //! of `Huffman coding`, an algorithm developed by David A. Huffman while he was
23 //! a Sc.D. student at MIT, and published in the 1952 paper "A Method for the
24 //! Construction of Minimum-Redundancy Codes".
25 //!
26 //! # Huffman code in Http/2
27 //! There is a table of Huffman code in `RFC7541`. This [Huffman code] was
28 //! generated from statistics obtained on a large sample of HTTP headers. It is
29 //! a canonical Huffman code with some tweaking to ensure that no symbol has a
30 //! unique code length.
31 //!
32 //! [Huffman Code]: https://www.rfc-editor.org/rfc/rfc7541.html#ref-HUFFMAN
33
34 // TODO: Introduction of `Huffman code in Http/3`.
35
36 mod consts;
37
38 use core::cmp::Ordering;
39
40 use consts::{HUFFMAN_DECODE, HUFFMAN_ENCODE};
41
42 /// Converts a string to a Huffman code, and then put it into the
43 /// specified `Vec<u8>`.
huffman_encode(src: &[u8], dst: &mut Vec<u8>)44 pub(crate) fn huffman_encode(src: &[u8], dst: &mut Vec<u8>) {
45 // We use `state` to hold temporary encoding state.
46 // We use `unfilled` to represent the remaining number of bits that is not
47 // filled. Each time any bytes are encoded, we will store the result bits
48 // in `state`.
49 //
50 // When `state` is not full, we add the result bits to `Unfilled`.
51 // `state`:
52 // +----------+----------+----------------------------+
53 // | Result A | Result B | Unfilled |
54 // +----------+----------+----------------------------+
55 // |<------------------- 64 bits ------------------->
56 //
57 // When the length of the result bits is greater than the length of `Unfilled`,
58 // we will truncate it.
59 // `state`:
60 // +---------------------+----------------------------+
61 // | | A part of Result C | -> Output it.
62 // +---------------------+----------------------------+
63 // |<-------------- full 64 bits ------------------->
64 //
65 // Final `state`:
66 // +--------------------------------+-----------------+
67 // | The remaining part of Result C | Unfilled |
68 // +--------------------------------+-----------------+
69
70 let mut state = 0u64;
71 // The initial value of `unfilled` is equal to the number of bits in the
72 // `state`.
73 let mut unfilled = 64;
74
75 for byte in src.iter() {
76 let (nbits, code) = HUFFMAN_ENCODE[*byte as usize];
77 match unfilled.cmp(&nbits) {
78 Ordering::Greater => {
79 state |= code << (unfilled - nbits);
80 unfilled -= nbits;
81 }
82 Ordering::Equal => {
83 state |= code;
84 dst.extend_from_slice(&state.to_be_bytes());
85 state = 0;
86 unfilled = 64;
87 }
88 // We rotate the `code` to the right, and we will get `rotate`.
89 // `rotate`:
90 // +---------+-----------------+----------+
91 // | Parts A | | Parts B |
92 // +---------+-----------------+----------+
93 // `mask`:
94 // +---------+-----------------+----------+
95 // | 000...0 | 111...1 |
96 // +---------+-----------------+----------+
97 // `rotate` & mask => Parts B
98 // `rotate` & !mask => Parts A
99 Ordering::Less => {
100 let rotate = code.rotate_right((nbits - unfilled) as u32);
101 let mask = u64::MAX >> (64 - unfilled);
102 state |= rotate & mask;
103 dst.extend_from_slice(&state.to_be_bytes());
104 state = rotate & !mask;
105 unfilled = 64 - (nbits - unfilled);
106 }
107 }
108 }
109
110 // At the end of character decoding, if the last byte is not completely
111 // filled, it needs to be filled with `0b1`.
112 if unfilled != 64 {
113 state |= u64::MAX >> (64 - unfilled);
114 let bytes = &state.to_be_bytes();
115 // Here we only need to output the filled bytes, not all the `state`.
116 let len = (8 - (unfilled >> 3)) as usize;
117 dst.extend_from_slice(&bytes.as_slice()[..len]);
118 }
119 }
120
121 /// Converts a Huffman code into a literal string at one time, and then put it
122 /// into the specified `Vec<u8>`.
123 ///
124 /// The algorithm comes from crate [h2].
125 ///
126 /// [h2]: https://crates.io/crates/h2
huffman_decode(src: &[u8], dst: &mut Vec<u8>) -> Result<(), HuffmanDecodeError>127 pub(crate) fn huffman_decode(src: &[u8], dst: &mut Vec<u8>) -> Result<(), HuffmanDecodeError> {
128 // We use a state machine to parse Huffman code.
129 // We have a `HUFFMAN_DECODE` table, which contains three elements:
130 // `State`, `Decoded Byte`, and `Flags`.
131 //
132 // `State` represents the current state. It can be considered as the value
133 // composed of the remaining unparsed bits after the last parsing is completed.
134 //
135 // `Decoded Byte` represents the decoded character when the `Decoded` bit
136 // of `Flags` is set to `0b1`.
137 //
138 // `Flags` contains three bits: `MAYBE_EOS`(0x1), `DECODED`(0x2), `ERROR`(0x4).
139 //
140 // When `MAYBE_EOS` is set, it means that the current character may be the
141 // end of the literal string.
142 //
143 // When `DECODED` is set, it means that the character at `Decoded Bytes` is
144 // the current decoded character.
145 //
146 // When `ERROR` is set, it means that there is a wrong Huffman code in the
147 // byte sequence.
148 //
149 // We use a variable called `state` to hold the current state. Its initial
150 // value is 0. Every time we take out 4 bits from the byte sequence that
151 // needs to be parsed and look it up in the `HUFFMAN_DECODE` table. Then we
152 // will get `state`, `byte` and `flags`. We can judge the current state
153 // through `flags` and choose to continue decoding or return an error.
154
155 let (state, flags) = huffman_decode_inner(src, dst, 0, 0)?;
156
157 // The decoding operation succeeds in the following two cases:
158 // 1. `state` is 0. It means that all bits are parsed normally.
159 // 2. `state` is not 0, but `maybe_eos` is true. It means that not all bits
160 // are parsed and the last byte can be the terminator. The remaining bits are
161 // allowed because a part of the padding is done when http/2 performs
162 // Huffman encoding.
163 if state != 0 && (flags & 0x1) == 0 {
164 return Err(HuffmanDecodeError::InvalidHuffmanCode);
165 }
166
167 Ok(())
168 }
169
huffman_decode_inner( src: &[u8], dst: &mut Vec<u8>, state: u8, flags: u8, ) -> Result<(u8, u8), HuffmanDecodeError>170 fn huffman_decode_inner(
171 src: &[u8],
172 dst: &mut Vec<u8>,
173 state: u8,
174 flags: u8,
175 ) -> Result<(u8, u8), HuffmanDecodeError> {
176 let (mut state, mut _result, mut flags) = (state, 0, flags);
177
178 for byte in src.iter() {
179 let left = byte >> 4;
180 let right = byte & 0xf;
181
182 (state, _result, flags) = HUFFMAN_DECODE[state as usize][left as usize];
183 if (flags & 0x4) == 0x4 {
184 return Err(HuffmanDecodeError::InvalidHuffmanCode);
185 }
186 if (flags & 0x2) == 0x2 {
187 dst.push(_result);
188 }
189
190 (state, _result, flags) = HUFFMAN_DECODE[state as usize][right as usize];
191 if (flags & 0x4) == 0x4 {
192 return Err(HuffmanDecodeError::InvalidHuffmanCode);
193 }
194 if (flags & 0x2) == 0x2 {
195 dst.push(_result);
196 }
197 }
198 Ok((state, flags))
199 }
200
201 /// Converts a Huffman code into a literal string, and then put it into the
202 /// specified `Vec<u8>`. Users can split the string into multiple slices and
203 /// then pass them into `HuffmanDecoder` to get the result.
204 ///
205 /// The algorithm comes from crate [h2].
206 ///
207 /// [h2]: https://crates.io/crates/h2
208 pub(crate) struct HuffmanDecoder {
209 state: u8,
210 flags: u8,
211 vec: Vec<u8>,
212 }
213
214 impl HuffmanDecoder {
215 /// Creates a new, empty `HuffmanDecoder`.
new() -> Self216 pub(crate) fn new() -> Self {
217 Self {
218 state: 0,
219 flags: 0,
220 vec: Vec::new(),
221 }
222 }
223
224 /// Decodes input string. Stop when the `src` is used up.
decode(&mut self, src: &[u8]) -> Result<(), HuffmanDecodeError>225 pub(crate) fn decode(&mut self, src: &[u8]) -> Result<(), HuffmanDecodeError> {
226 (self.state, self.flags) =
227 huffman_decode_inner(src, &mut self.vec, self.state, self.flags)?;
228 Ok(())
229 }
230
231 /// Finishes decoding and get the decoded result.
finish(self) -> Result<Vec<u8>, HuffmanDecodeError>232 pub(crate) fn finish(self) -> Result<Vec<u8>, HuffmanDecodeError> {
233 if self.state != 0 && (self.flags & 0x1) == 0 {
234 return Err(HuffmanDecodeError::InvalidHuffmanCode);
235 }
236 Ok(self.vec)
237 }
238 }
239
240 /// Possible errors in Huffman decoding operations.
241 #[derive(Debug)]
242 pub(crate) enum HuffmanDecodeError {
243 InvalidHuffmanCode,
244 }
245
246 #[cfg(test)]
247 mod ut_huffman {
248 use super::{huffman_decode, huffman_encode, HuffmanDecoder};
249 use crate::util::test_util::decode;
250
251 /// UT test cases for `huffman_encode`.
252 ///
253 /// # Brief
254 /// 1. Calls `huffman_encode` function, passing in the specified parameters.
255 /// 2. Checks if the test results are correct.
256 #[test]
ut_huffman_encode()257 fn ut_huffman_encode() {
258 rfc7541_test_cases();
259
260 macro_rules! huffman_test_case {
261 ($ctn: expr, $res: expr $(,)?) => {
262 let mut vec = Vec::new();
263 huffman_encode($ctn.as_bytes(), &mut vec);
264 assert_eq!(vec, decode($res).unwrap())
265 };
266 }
267
268 /// The following test cases are from RFC7541.
269 fn rfc7541_test_cases() {
270 // C.4.1 First Request
271 huffman_test_case!("www.example.com", "f1e3c2e5f23a6ba0ab90f4ff");
272
273 // C.4.2 Second Request
274 huffman_test_case!("no-cache", "a8eb10649cbf");
275
276 // C.4.3 Third Request
277 huffman_test_case!("custom-value", "25a849e95bb8e8b4bf");
278
279 // C.6.1 First Response
280 huffman_test_case!("302", "6402");
281 huffman_test_case!("private", "aec3771a4b");
282 huffman_test_case!(
283 "Mon, 21 Oct 2013 20:13:21 GMT",
284 "d07abe941054d444a8200595040b8166e082a62d1bff"
285 );
286 huffman_test_case!(
287 "https://www.example.com",
288 "9d29ad171863c78f0b97c8e9ae82ae43d3"
289 );
290
291 // C.6.2 Second Response
292 huffman_test_case!("307", "640eff");
293
294 // C.6.3 Third Response
295 huffman_test_case!("gzip", "9bd9ab");
296 huffman_test_case!(
297 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1",
298 "94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007",
299 );
300 }
301 }
302
303 /// UT test cases for `huffman_decode`.
304 ///
305 /// # Brief
306 /// 1. Calls `huffman_decode` function, passing in the specified parameters.
307 /// 2. Checks if the test results are correct.
308 #[test]
ut_huffman_decode()309 fn ut_huffman_decode() {
310 rfc7541_test_cases();
311
312 macro_rules! huffman_test_case {
313 ($ctn: expr, $res: expr $(,)?) => {
314 let mut vec = Vec::new();
315 huffman_decode(decode($ctn).unwrap().as_slice(), &mut vec).unwrap();
316 assert_eq!(vec.as_slice(), $res.as_bytes())
317 };
318 }
319
320 /// The following test cases are from RFC7541.
321 fn rfc7541_test_cases() {
322 // C.4.1 First Request
323 huffman_test_case!("f1e3c2e5f23a6ba0ab90f4ff", "www.example.com");
324
325 // C.4.2 Second Request
326 huffman_test_case!("a8eb10649cbf", "no-cache");
327
328 // C.4.3 Third Request
329 huffman_test_case!("25a849e95bb8e8b4bf", "custom-value");
330
331 // C.6.1 First Response
332 huffman_test_case!("6402", "302");
333 huffman_test_case!("aec3771a4b", "private");
334 huffman_test_case!(
335 "d07abe941054d444a8200595040b8166e082a62d1bff",
336 "Mon, 21 Oct 2013 20:13:21 GMT"
337 );
338 huffman_test_case!(
339 "9d29ad171863c78f0b97c8e9ae82ae43d3",
340 "https://www.example.com",
341 );
342
343 // C.6.2 Second Response
344 huffman_test_case!("640eff", "307");
345
346 // C.6.3 Third Response
347 huffman_test_case!("9bd9ab", "gzip");
348 huffman_test_case!(
349 "94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007",
350 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"
351 );
352 }
353 }
354
355 /// UT test cases for `HuffmanDecoder::decode`.
356 ///
357 /// # Brief
358 /// 1. Creates a `HuffmanDecoder`.
359 /// 1. Calls `decode` and `finish` function, passing in the specified
360 /// parameters.
361 /// 2. Checks if the test results are correct.
362 #[test]
ut_huffman_decoder()363 fn ut_huffman_decoder() {
364 rfc7541_test_cases();
365 slices_test();
366
367 macro_rules! huffman_test_case {
368 ($content: expr, $result: expr) => {{
369 let mut decoder = HuffmanDecoder::new();
370 for cont in $content.as_slice().iter() {
371 let bytes = decode(cont).unwrap();
372 assert!(decoder.decode(&bytes).is_ok());
373 }
374 match decoder.finish() {
375 Ok(vec) => vec == $result.as_bytes(),
376 _ => panic!(),
377 }
378 }};
379 }
380
381 /// The following test cases are from RFC7541.
382 fn rfc7541_test_cases() {
383 // C.4.1 First Request
384 huffman_test_case!(["f1e3c2e5f23a6ba0ab90f4ff"], "www.example.com");
385
386 // C.4.2 Second Request
387 huffman_test_case!(["a8eb10649cbf"], "no-cache");
388
389 // C.4.3 Third Request
390 huffman_test_case!(["25a849e95bb8e8b4bf"], "custom-value");
391
392 // C.6.1 First Response
393 huffman_test_case!(["6402"], "302");
394 huffman_test_case!(["aec3771a4b"], "private");
395 huffman_test_case!(
396 ["d07abe941054d444a8200595040b8166e082a62d1bff"],
397 "Mon, 21 Oct 2013 20:13:21 GMT"
398 );
399 huffman_test_case!(
400 ["9d29ad171863c78f0b97c8e9ae82ae43d3"],
401 "https://www.example.com"
402 );
403
404 // C.6.2 Second Response
405 huffman_test_case!(["640eff"], "307");
406
407 // C.6.3 Third Response
408 huffman_test_case!(["9bd9ab"], "gzip");
409 huffman_test_case!(
410 ["94e7821dd7f2e6c7b335dfdfcd5b3960d5af27087f3672c1ab270fb5291f9587316065c003ed4ee5b1063d5007"],
411 "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"
412 );
413 }
414
415 /// The following test cases is for testing segmented byte slices.
416 fn slices_test() {
417 // Fragmentation
418 huffman_test_case!(["a8", "eb", "10", "64", "9c", "bf"], "no-cache");
419
420 // Fragmentation + Blank
421 huffman_test_case!(
422 ["", "", "", "", "a8", "", "eb", "10", "", "64", "9c", "", "bf", "", ""],
423 "no-cache"
424 );
425 }
426 }
427 }
428