• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2017 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// +build ignore
16
17package main
18
19// extract-deflate-offsets.go extracts the start and end offsets of the
20// deflate-compressed data wrapped in a .gz file.
21//
22// Usage: go run extract-deflate-offsets.go foo.gz bar.gz
23//
24// Alternatively: go run extract-deflate-offsets.go -write-deflate foo.gz
25//
26// Alternatively: go run extract-deflate-offsets.go -write-zlib foo.gz
27
28import (
29	"bytes"
30	"compress/flate"
31	"compress/zlib"
32	"crypto/md5"
33	"flag"
34	"fmt"
35	"hash/adler32"
36	"io/ioutil"
37	"os"
38	"strings"
39)
40
41var (
42	writeDeflate = flag.Bool("write-deflate", false, "whether to convert gzip to raw deflate")
43	writeZlib    = flag.Bool("write-zlib", false, "whether to convert gzip to zlib")
44)
45
46// GZIP wraps a header and footer around deflate data. The format is described in
47// RFC 1952: https://www.ietf.org/rfc/rfc1952.txt
48const (
49	flagText    = 1 << 0
50	flagHCRC    = 1 << 1
51	flagExtra   = 1 << 2
52	flagName    = 1 << 3
53	flagComment = 1 << 4
54)
55
56func main() {
57	if err := main1(); err != nil {
58		os.Stderr.WriteString(err.Error() + "\n")
59		os.Exit(1)
60	}
61}
62
63func main1() error {
64	flag.Parse()
65	for _, a := range flag.Args() {
66		if err := decode(a); err != nil {
67			return err
68		}
69	}
70	return nil
71}
72
73func decode(filename string) error {
74	src, err := ioutil.ReadFile(filename)
75	if err != nil {
76		return err
77	}
78
79	const (
80		headerSize = 10
81		footerSize = 8
82	)
83	if len(src) < headerSize+footerSize || src[0] != 0x1F || src[1] != 0x8B || src[2] != 0x08 {
84		return fmt.Errorf("not a GZIP")
85	}
86	if len(src) >= 0x10000000 {
87		return fmt.Errorf("file too large")
88	}
89	flags := src[3]
90	i := headerSize
91	src = src[:len(src)-footerSize]
92
93	if flags&flagExtra != 0 {
94		return fmt.Errorf("TODO: support gzip extra flag")
95	}
96
97	if flags&flagName != 0 {
98		if i, err = readString(src, i); err != nil {
99			return err
100		}
101	}
102
103	if flags&flagComment != 0 {
104		if i, err = readString(src, i); err != nil {
105			return err
106		}
107	}
108
109	if flags&flagHCRC != 0 {
110		return fmt.Errorf("TODO: support gzip HCRC flag")
111	}
112
113	// As a sanity check, the result should be valid deflate.
114	uncompressed, err := checkDeflate(src[i:])
115	if err != nil {
116		return err
117	}
118
119	if *writeDeflate {
120		return doWriteDeflate(src[i:], uncompressed, filename)
121	} else if *writeZlib {
122		return doWriteZlib(src[i:], uncompressed, filename)
123	}
124	fmt.Printf("%7d %7d %x  %s\n", i, len(src), md5.Sum(uncompressed), filename)
125	return nil
126}
127
128func doWriteDeflate(deflateCompressed []byte, uncompressed []byte, filename string) error {
129	if strings.HasSuffix(filename, ".gz") {
130		filename = filename[:len(filename)-3]
131	}
132	filename += ".deflate"
133	if err := ioutil.WriteFile(filename, deflateCompressed, 0666); err != nil {
134		return err
135	}
136	fmt.Printf("wrote %s\n", filename)
137	return nil
138}
139
140func doWriteZlib(deflateCompressed []byte, uncompressed []byte, filename string) error {
141	buf := bytes.NewBuffer(nil)
142	// The ZLIB header (as per https://www.ietf.org/rfc/rfc1950.txt) is 2
143	// bytes.
144	//
145	// The first byte's low 4 bits is the compression method: 8 means deflate.
146	// The first byte's high 4 bits is the compression info: 7 means a 32KiB
147	// deflate window size.
148	//
149	// The second byte's low 5 bits are a parity check. The 5th bit (0 in this
150	// case) indicates a preset dictionary. The high 2 bits (2 in this case)
151	// means the default compression algorithm.
152	buf.WriteString("\x78\x9c")
153	// Write the payload.
154	buf.Write(deflateCompressed)
155	// The ZLIB footer is 4 bytes: a big-endian checksum.
156	checksum := adler32.Checksum(uncompressed)
157	buf.WriteByte(uint8(checksum >> 24))
158	buf.WriteByte(uint8(checksum >> 16))
159	buf.WriteByte(uint8(checksum >> 8))
160	buf.WriteByte(uint8(checksum >> 0))
161
162	asZlib := buf.Bytes()
163
164	// As a sanity check, the result should be valid zlib.
165	if _, err := checkZlib(asZlib); err != nil {
166		return err
167	}
168
169	if strings.HasSuffix(filename, ".gz") {
170		filename = filename[:len(filename)-3]
171	}
172	filename += ".zlib"
173	if err := ioutil.WriteFile(filename, asZlib, 0666); err != nil {
174		return err
175	}
176	fmt.Printf("wrote %s\n", filename)
177	return nil
178}
179
180func readString(src []byte, i int) (int, error) {
181	for {
182		if i >= len(src) {
183			return 0, fmt.Errorf("bad GZIP string")
184		}
185		if src[i] == 0 {
186			return i + 1, nil
187		}
188		i++
189	}
190}
191
192func checkDeflate(x []byte) ([]byte, error) {
193	rc := flate.NewReader(bytes.NewReader(x))
194	defer rc.Close()
195	x, err := ioutil.ReadAll(rc)
196	if err != nil {
197		return nil, fmt.Errorf("data is not valid deflate: %v", err)
198	}
199	return x, nil
200}
201
202func checkZlib(x []byte) ([]byte, error) {
203	rc, err := zlib.NewReader(bytes.NewReader(x))
204	if err != nil {
205		return nil, fmt.Errorf("data is not valid zlib: %v", err)
206	}
207	defer rc.Close()
208	x, err = ioutil.ReadAll(rc)
209	if err != nil {
210		return nil, fmt.Errorf("data is not valid zlib: %v", err)
211	}
212	return x, nil
213}
214