• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package flatecut
16
17import (
18	"bytes"
19	"compress/flate"
20	"io"
21	"io/ioutil"
22	"testing"
23
24	"github.com/google/wuffs/internal/testcut"
25)
26
27func TestCut(t *testing.T) {
28	testcut.Test(t, SmallestValidMaxEncodedLen, Cut, newReader, []string{
29		"artificial/deflate-backref-crosses-blocks.deflate",
30		"artificial/deflate-distance-32768.deflate",
31		"romeo.txt.deflate",
32		"romeo.txt.fixed-huff.deflate",
33	})
34}
35
36func BenchmarkCut(b *testing.B) {
37	testcut.Benchmark(b, SmallestValidMaxEncodedLen, Cut, newReader,
38		"pi.txt.zlib", 2, 4, 100003)
39}
40
41func newReader(r io.Reader) (io.ReadCloser, error) {
42	return flate.NewReader(r), nil
43}
44
45func decodeFlate(src []byte) string {
46	dst, err := ioutil.ReadAll(flate.NewReader(bytes.NewReader(src)))
47	if err != nil {
48		return err.Error()
49	}
50	return string(dst)
51}
52
53func TestHuffmanDecode(t *testing.T) {
54	// This exercises the "ABCDEFGH" example from RFC 1951 section 3.2.2,
55	// discussed in the "type huffman" doc comment.
56
57	const src = "HEADFACE"
58	codes := []string{
59		'A': " 010",
60		'B': " 011",
61		'C': " 100",
62		'D': " 101",
63		'E': " 110",
64		'F': "  00",
65		'G': "1110",
66		'H': "1111",
67	}
68
69	encoded := []byte(nil)
70	encBits := uint32(0)
71	encNBits := uint32(0)
72	for _, s := range src {
73		for _, c := range codes[s] {
74			if c == ' ' {
75				continue
76			}
77
78			if c == '1' {
79				encBits |= 1 << encNBits
80			}
81			encNBits++
82
83			if encNBits == 8 {
84				encoded = append(encoded, uint8(encBits))
85				encBits = 0
86				encNBits = 0
87			}
88		}
89	}
90	if encNBits > 0 {
91		encoded = append(encoded, uint8(encBits))
92	}
93
94	h := huffman{
95		counts: [maxCodeBits + 1]uint32{
96			2: 1, 3: 5, 4: 2,
97		},
98		symbols: [maxNumCodes]int32{
99			0: 'F', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'G', 7: 'H',
100		},
101	}
102	h.constructLookUpTable()
103
104	b := &bitstream{
105		bytes: encoded,
106	}
107
108	decoded := []byte(nil)
109	for _ = range src {
110		c := h.decode(b)
111		if c < 0 {
112			decoded = append(decoded, '!')
113			break
114		}
115		decoded = append(decoded, uint8(c))
116	}
117
118	if got, want := string(decoded), src; got != want {
119		t.Fatalf("got %q, want %q", got, want)
120	}
121}
122
123// A DEFLATE encoding of the first 64 bytes of the AUTHORS file in the
124// repository root directory.
125//
126// The encoding uses a Dynamic Huffman block, but one whose H-D Huffman
127// distance tree is degenerate (there's a mapping for the "0" code but no
128// mapping for the "1" code) and unused.
129//
130// The first 25-30 bytes contain the encoded H-L and H-D Huffman trees. The
131// last 30-35 bytes contain the actual payload, encoded with H-L.
132//
133// The high 7 bits of the final byte is unused padding.
134const (
135	degenerateHuffmanDec = "# This is the official list of Wuffs authors for copyright purpo"
136	degenerateHuffmanEnc = "" +
137		"\x05\xc0\xc1\x09\xc5\x30\x0c\x03\xd0\x55\x04\x7f\xa5\x0f\x3d\x87" +
138		"\x50\xd5\x82\x80\x82\xed\x1c\xba\x7d\xdf\x0f\xff\x50\x41\x85\x8e" +
139		"\x1b\x26\x35\x35\x16\x96\xaa\x61\xe2\x3a\x64\x61\x9c\x0e\x67\x81" +
140		"\x4e\x4c\xef\x37\xf5\x44\x63\x9f\xdc\xfe\x00"
141)
142
143func TestReplaceHuffmanWithStored(t *testing.T) {
144	const dec = degenerateHuffmanDec
145	const enc = degenerateHuffmanEnc
146	if (len(dec) != 64) || (len(enc) != 59) {
147		panic("inconsistent const string lengths")
148	}
149	if got, want := decodeFlate([]byte(enc)), dec; got != want {
150		t.Fatalf("before Cut: got %q, want %q", got, want)
151	}
152
153	for i := 4; i <= 59; i += 5 {
154		b := []byte(enc)
155		encLen, decLen, err := Cut(nil, b, i)
156		if err != nil {
157			t.Errorf("i=%d: %v", i, err)
158			continue
159		}
160		if encLen < 1 {
161			t.Errorf("i=%d: encLen: got %d, want >= 1", i, encLen)
162			continue
163		} else if encLen > len(enc) {
164			t.Errorf("i=%d: encLen: got %d, want <= %d", i, encLen, len(enc))
165			continue
166		}
167		// If we can make some progress (decLen > 0), even if the input uses a
168		// Huffman block, one option is to re-encode to a single Stored block
169		// (for 5 bytes of overhead). It's single because len(dec) < 0xFFFF.
170		// Regardless of whether the cut form uses a Huffman or Stored block,
171		// we should be able to produce at least i-5 bytes of decoded output.
172		if (decLen > 0) && (i > 5) && (decLen < i-5) {
173			t.Errorf("i=%d: decLen: got %d, want >= %d", i, decLen, i-5)
174			continue
175		} else if decLen > len(dec) {
176			t.Errorf("i=%d: decLen: got %d, want <= %d", i, decLen, len(dec))
177			continue
178		}
179
180		if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
181			t.Errorf("i=%d: after Cut: got %q, want %q", i, got, want)
182			continue
183		}
184
185		// Check that we are using a space-efficient block type.
186		{
187			got := (b[0] >> 1) & 3
188			want := uint8(0xFF)
189
190			switch i {
191			case 4:
192				want = 1 // Static Huffman, for a decLen of 0.
193			case 9:
194				want = 0 // Stored.
195			case 59:
196				want = 2 // Dynamic Huffman.
197			default:
198				continue
199			}
200
201			if got != want {
202				t.Errorf("i=%d: block type: got %d, want %d", i, got, want)
203			}
204		}
205	}
206}
207
208func TestDegenerateHuffmanUnused(t *testing.T) {
209	const dec = degenerateHuffmanDec
210	const enc = degenerateHuffmanEnc
211
212	// Cutting 1 byte off the end of the encoded form will lead to cutting n
213	// bytes off the decoded form. Coincidentally, n equals 1, even though each
214	// decoded byte (8 bits) is packed into smaller number of bits, as most of
215	// the final encoded byte's bits are unused padding.
216	const n = 1
217
218	b := []byte(enc)
219	encLen, decLen, err := Cut(nil, b, len(enc)-1)
220	if err != nil {
221		t.Fatalf("Cut: %v", err)
222	} else if encLen != len(enc)-1 {
223		t.Fatalf("encLen: got %d, want %d", encLen, len(enc)-1)
224	} else if decLen != len(dec)-n {
225		t.Fatalf("decLen: got %d, want %d", decLen, len(dec)-n)
226	}
227
228	if got, want := decodeFlate(b[:encLen]), dec[:decLen]; got != want {
229		t.Fatalf("after Cut: got %q, want %q", got, want)
230	}
231}
232