1// Copyright 2019 The Wuffs Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package flatecut 16 17import ( 18 "bytes" 19 "compress/flate" 20 "io" 21 "io/ioutil" 22 "testing" 23 24 "github.com/google/wuffs/internal/testcut" 25) 26 27func TestCut(t *testing.T) { 28 testcut.Test(t, SmallestValidMaxEncodedLen, Cut, newReader, []string{ 29 "artificial/deflate-backref-crosses-blocks.deflate", 30 "artificial/deflate-distance-32768.deflate", 31 "romeo.txt.deflate", 32 "romeo.txt.fixed-huff.deflate", 33 }) 34} 35 36func BenchmarkCut(b *testing.B) { 37 testcut.Benchmark(b, SmallestValidMaxEncodedLen, Cut, newReader, 38 "pi.txt.zlib", 2, 4, 100003) 39} 40 41func newReader(r io.Reader) (io.ReadCloser, error) { 42 return flate.NewReader(r), nil 43} 44 45func decodeFlate(src []byte) string { 46 dst, err := ioutil.ReadAll(flate.NewReader(bytes.NewReader(src))) 47 if err != nil { 48 return err.Error() 49 } 50 return string(dst) 51} 52 53func TestHuffmanDecode(t *testing.T) { 54 // This exercises the "ABCDEFGH" example from RFC 1951 section 3.2.2, 55 // discussed in the "type huffman" doc comment. 56 57 const src = "HEADFACE" 58 codes := []string{ 59 'A': " 010", 60 'B': " 011", 61 'C': " 100", 62 'D': " 101", 63 'E': " 110", 64 'F': " 00", 65 'G': "1110", 66 'H': "1111", 67 } 68 69 encoded := []byte(nil) 70 encBits := uint32(0) 71 encNBits := uint32(0) 72 for _, s := range src { 73 for _, c := range codes[s] { 74 if c == ' ' { 75 continue 76 } 77 78 if c == '1' { 79 encBits |= 1 << encNBits 80 } 81 encNBits++ 82 83 if encNBits == 8 { 84 encoded = append(encoded, uint8(encBits)) 85 encBits = 0 86 encNBits = 0 87 } 88 } 89 } 90 if encNBits > 0 { 91 encoded = append(encoded, uint8(encBits)) 92 } 93 94 h := huffman{ 95 counts: [maxCodeBits + 1]uint32{ 96 2: 1, 3: 5, 4: 2, 97 }, 98 symbols: [maxNumCodes]int32{ 99 0: 'F', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'G', 7: 'H', 100 }, 101 } 102 h.constructLookUpTable() 103 104 b := &bitstream{ 105 bytes: encoded, 106 } 107 108 decoded := []byte(nil) 109 for _ = range src { 110 c := h.decode(b) 111 if c < 0 { 112 decoded = append(decoded, '!') 113 break 114 } 115 decoded = append(decoded, uint8(c)) 116 } 117 118 if got, want := string(decoded), src; got != want { 119 t.Fatalf("got %q, want %q", got, want) 120 } 121} 122 123// A DEFLATE encoding of the first 64 bytes of the AUTHORS file in the 124// repository root directory. 125// 126// The encoding uses a Dynamic Huffman block, but one whose H-D Huffman 127// distance tree is degenerate (there's a mapping for the "0" code but no 128// mapping for the "1" code) and unused. 129// 130// The first 25-30 bytes contain the encoded H-L and H-D Huffman trees. The 131// last 30-35 bytes contain the actual payload, encoded with H-L. 132// 133// The high 7 bits of the final byte is unused padding. 134const ( 135 degenerateHuffmanDec = "# This is the official list of Wuffs authors for copyright purpo" 136 degenerateHuffmanEnc = "" + 137 "\x05\xc0\xc1\x09\xc5\x30\x0c\x03\xd0\x55\x04\x7f\xa5\x0f\x3d\x87" + 138 "\x50\xd5\x82\x80\x82\xed\x1c\xba\x7d\xdf\x0f\xff\x50\x41\x85\x8e" + 139 "\x1b\x26\x35\x35\x16\x96\xaa\x61\xe2\x3a\x64\x61\x9c\x0e\x67\x81" + 140 "\x4e\x4c\xef\x37\xf5\x44\x63\x9f\xdc\xfe\x00" 141) 142 143func TestReplaceHuffmanWithStored(t *testing.T) { 144 const dec = degenerateHuffmanDec 145 const enc = degenerateHuffmanEnc 146 if (len(dec) != 64) || (len(enc) != 59) { 147 panic("inconsistent const string lengths") 148 } 149 if got, want := decodeFlate([]byte(enc)), dec; got != want { 150 t.Fatalf("before Cut: got %q, want %q", got, want) 151 } 152 153 for i := 4; i <= 59; i += 5 { 154 b := []byte(enc) 155 encLen, decLen, err := Cut(nil, b, i) 156 if err != nil { 157 t.Errorf("i=%d: %v", i, err) 158 continue 159 } 160 if encLen < 1 { 161 t.Errorf("i=%d: encLen: got %d, want >= 1", i, encLen) 162 continue 163 } else if encLen > len(enc) { 164 t.Errorf("i=%d: encLen: got %d, want <= %d", i, encLen, len(enc)) 165 continue 166 } 167 // If we can make some progress (decLen > 0), even if the input uses a 168 // Huffman block, one option is to re-encode to a single Stored block 169 // (for 5 bytes of overhead). It's single because len(dec) < 0xFFFF. 170 // Regardless of whether the cut form uses a Huffman or Stored block, 171 // we should be able to produce at least i-5 bytes of decoded output. 172 if (decLen > 0) && (i > 5) && (decLen < i-5) { 173 t.Errorf("i=%d: decLen: got %d, want >= %d", i, decLen, i-5) 174 continue 175 } else if decLen > len(dec) { 176 t.Errorf("i=%d: decLen: got %d, want <= %d", i, decLen, len(dec)) 177 continue 178 } 179 180 if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want { 181 t.Errorf("i=%d: after Cut: got %q, want %q", i, got, want) 182 continue 183 } 184 185 // Check that we are using a space-efficient block type. 186 { 187 got := (b[0] >> 1) & 3 188 want := uint8(0xFF) 189 190 switch i { 191 case 4: 192 want = 1 // Static Huffman, for a decLen of 0. 193 case 9: 194 want = 0 // Stored. 195 case 59: 196 want = 2 // Dynamic Huffman. 197 default: 198 continue 199 } 200 201 if got != want { 202 t.Errorf("i=%d: block type: got %d, want %d", i, got, want) 203 } 204 } 205 } 206} 207 208func TestDegenerateHuffmanUnused(t *testing.T) { 209 const dec = degenerateHuffmanDec 210 const enc = degenerateHuffmanEnc 211 212 // Cutting 1 byte off the end of the encoded form will lead to cutting n 213 // bytes off the decoded form. Coincidentally, n equals 1, even though each 214 // decoded byte (8 bits) is packed into smaller number of bits, as most of 215 // the final encoded byte's bits are unused padding. 216 const n = 1 217 218 b := []byte(enc) 219 encLen, decLen, err := Cut(nil, b, len(enc)-1) 220 if err != nil { 221 t.Fatalf("Cut: %v", err) 222 } else if encLen != len(enc)-1 { 223 t.Fatalf("encLen: got %d, want %d", encLen, len(enc)-1) 224 } else if decLen != len(dec)-n { 225 t.Fatalf("decLen: got %d, want %d", decLen, len(dec)-n) 226 } 227 228 if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want { 229 t.Fatalf("after Cut: got %q, want %q", got, want) 230 } 231} 232