• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2018 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// +build ignore
16
17package main
18
19// print-lzw-example.go prints a worked example discussed in std/lzw/README.md,
20// based on three implementations of a simplified core of the LZW algorithm.
21// Simplifications include assuming LSB-first, 8 bit literals, no clear codes,
22// no full tables and that the input is given in terms of a code stream, not a
23// bit stream.
24//
25// The implementations are designed for ease of studying, and for e.g. a
26// minimal diff between the suf1 and sufQ implementations, and are not
27// optimized for performance.
28//
29// Usage: go run print-lzw-example.go
30//
31// You can also play with the code (e.g. modify Q and re-run) online:
32// https://play.golang.org/p/1aLC_XHZzoA
33
34import (
35	"fmt"
36)
37
38const (
39	expectedOutput = "TOBEORNOTTOBEORTOBEORNOTXOTXOTXOOTXOOOTXOOOTOBEY"
40
41	clearCode = 0x100
42	endCode   = 0x101
43
44	// Neither clearCode or endCode can ever be a valid prefix code. We
45	// arbitrarily pick clearCode to represent "no prefix".
46	noPrefix = clearCode
47
48	// Q is the maximum possible suffix length, in bytes, which equals 1 plus
49	// the maximum possible "suffix length minus 1". Q == 1 means that sufQ is
50	// equivalent to suf1.
51	//
52	// In std/lzw/README.md, Q is 3 to keep the example short and simple. In
53	// practice, especially on modern CPUs that can do unaligned 32 or 64 bit
54	// loads and stores, Q is 4 or 8.
55	Q = 3
56)
57
58var codes = []int{
59	'T', 'O', 'B', 'E', 'O', 'R', 'N', 'O', 'T',
60	0x102, 0x104, 0x106, 0x10B, 0x105, 0x107, 0x109,
61	'X',
62	0x111, 0x113, 0x114, 0x115, 0x10E,
63	'Y',
64	0x101,
65}
66
67func codeString(code int) string {
68	if code < clearCode {
69		return fmt.Sprintf("%5c", code)
70	} else if code == noPrefix {
71		return "    -"
72	}
73	return fmt.Sprintf("0x%03X", code)
74}
75
76func main() {
77	n, s, q := new(naive), new(suf1), new(sufQ)
78	decode(n)
79	decode(s)
80	decode(q)
81
82	key := endCode
83	fmt.Printf(" Code   Emits    Key    Value   Pre1+Suf1  LM1  /Q  %%Q   PreQ+SufQ\n")
84	for _, code := range codes {
85		// Code
86		fmt.Printf("%s", codeString(code))
87
88		// Emits
89		if code == endCode {
90			fmt.Printf("   -end-\n")
91			break
92		}
93		fmt.Printf("  %6s", n[code])
94
95		if key != endCode {
96			fmt.Printf("  0x%03X  %7s  %s %c     %3d %3d %3d  %s %s",
97				key,                              // Key
98				n[key],                           // Value
99				codeString(int(s.prefixes[key])), // Pre1
100				s.suffixes[key],                  // Suf1
101				q.lengthM1s[key],                 // LM1
102				q.lengthM1s[key]/Q,               // /Q
103				q.lengthM1s[key]%Q,               // %Q
104				codeString(int(q.prefixes[key])), // PreQ
105				string(q.suffixes[key][:]),       // SufQ
106			)
107		}
108
109		fmt.Println()
110		key++
111	}
112}
113
114type implementation interface {
115	initialize()
116	emit(buffer []byte, code int, prevCode int, n int) []byte
117}
118
119func decode(impl implementation) {
120	impl.initialize()
121
122	output, prevCode, n := []byte(nil), -1, 0x101
123	for _, code := range codes {
124		if code == clearCode {
125			panic("unimplemented clearCode")
126		} else if code == endCode {
127			if string(output) != expectedOutput {
128				panic("unexpected output")
129			}
130			return
131		} else if code > n {
132			panic("invalid code")
133		}
134
135		// We have a literal or copy code.
136		output = impl.emit(output, code, prevCode, n)
137
138		prevCode, n = code, n+1
139		if n >= 4096 {
140			panic("unimplemented table-is-full")
141		}
142	}
143	panic("missing endCode")
144}
145
146// naive is a simple implementation that, in the worst case, requires O(N*N)
147// memory.
148type naive [4096]string
149
150func (t *naive) initialize() {
151	for i := 0; i < clearCode; i++ {
152		t[i] = string([]byte{byte(i)})
153	}
154}
155
156func (t *naive) emit(output []byte, code int, prevCode int, n int) []byte {
157	if prevCode >= 0 {
158		prevOutput := t[prevCode]
159		if code < n {
160			t[n] = prevOutput + t[code][:1]
161		} else {
162			t[n] = prevOutput + prevOutput[:1]
163		}
164	}
165	return append(output, t[code]...)
166}
167
168// suf1 keeps separate prefix and suffix tables, 1 byte per suffix.
169type suf1 struct {
170	prefixes [4096]uint16
171	suffixes [4096]byte
172}
173
174func (t *suf1) initialize() {
175	for i := 0; i < clearCode; i++ {
176		t.prefixes[i] = noPrefix
177		t.suffixes[i] = byte(i)
178	}
179}
180
181func (t *suf1) emit(output []byte, code int, prevCode int, n int) []byte {
182	// Fill an intermediate buffer from back to front, 1 byte at a time.
183	buffer := [4096]byte{}
184	i := len(buffer)
185
186	c := code
187	if code == n {
188		c = prevCode
189		i--
190		// buffer[4095] will be set later.
191	}
192
193	firstByte := byte(0)
194	for {
195		suffix := t.suffixes[c]
196		i--
197		buffer[i] = suffix
198		c = int(t.prefixes[c])
199		if c == noPrefix {
200			firstByte = suffix
201			break
202		}
203	}
204
205	if code == n {
206		buffer[4095] = firstByte
207	}
208
209	// The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
210	// 0, is correct conceptually, but unnecessary in practice. Look for "fake
211	// key-value pair" in std/lzw/README.md.
212	if prevCode >= 0 {
213		t.prefixes[n] = uint16(prevCode)
214		t.suffixes[n] = firstByte
215	}
216
217	return append(output, buffer[i:4096]...)
218}
219
220// sufQ keeps separate prefix and suffix tables, up to Q bytes per suffix.
221type sufQ struct {
222	lengthM1s [4096]uint16
223	prefixes  [4096]uint16
224	suffixes  [4096][Q]byte
225}
226
227func (t *sufQ) initialize() {
228	for i := 0; i < clearCode; i++ {
229		t.lengthM1s[i] = 0
230		t.prefixes[i] = noPrefix
231		t.suffixes[i][0] = byte(i)
232		for j := 1; j < Q; j++ {
233			// Unnecessary for correct output, but makes the printed table prettier.
234			t.suffixes[i][j] = '.'
235		}
236	}
237}
238
239func (t *sufQ) emit(output []byte, code int, prevCode int, n int) []byte {
240	// Fill an intermediate buffer from back to front, Q bytes at a time.
241	buffer := [4096 + Q - 1]byte{}
242	i := len(buffer)
243
244	c := code
245	if code == n {
246		c = prevCode
247		i--
248		// buffer[4095] will be set later.
249	}
250
251	i -= int(t.lengthM1s[c] % Q)
252
253	firstByte := byte(0)
254	for {
255		suffix := t.suffixes[c]
256		i -= Q
257		copy(buffer[i:i+Q], suffix[:])
258		c = int(t.prefixes[c])
259		if c == noPrefix {
260			firstByte = suffix[0]
261			break
262		}
263	}
264
265	if code == n {
266		buffer[4095] = firstByte
267	}
268
269	// The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
270	// 0, is correct conceptually, but unnecessary in practice. Look for "fake
271	// key-value pair" in std/lzw/README.md.
272	if prevCode >= 0 {
273		lm1 := t.lengthM1s[prevCode] + 1
274		t.lengthM1s[n] = lm1
275		lm1 %= Q
276		if lm1 != 0 {
277			// Copy the prevCode's prefix and suffix, and then extend the suffix.
278			t.prefixes[n] = t.prefixes[prevCode]
279			t.suffixes[n] = t.suffixes[prevCode]
280			t.suffixes[n][lm1] = firstByte
281		} else {
282			// Start a new suffix.
283			t.prefixes[n] = uint16(prevCode)
284			t.suffixes[n][0] = firstByte
285			for j := 1; j < Q; j++ {
286				// Unnecessary for correct output, but makes the printed table prettier.
287				t.suffixes[n][j] = '.'
288			}
289		}
290	}
291
292	return append(output, buffer[i:4096]...)
293}
294