1// Copyright 2017 The Wuffs Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// +build ignore 16 17package main 18 19// extract-deflate-offsets.go extracts the start and end offsets of the 20// deflate-compressed data wrapped in a .gz file. 21// 22// Usage: go run extract-deflate-offsets.go foo.gz bar.gz 23// 24// Alternatively: go run extract-deflate-offsets.go -write-deflate foo.gz 25// 26// Alternatively: go run extract-deflate-offsets.go -write-zlib foo.gz 27 28import ( 29 "bytes" 30 "compress/flate" 31 "compress/zlib" 32 "crypto/md5" 33 "flag" 34 "fmt" 35 "hash/adler32" 36 "io/ioutil" 37 "os" 38 "strings" 39) 40 41var ( 42 writeDeflate = flag.Bool("write-deflate", false, "whether to convert gzip to raw deflate") 43 writeZlib = flag.Bool("write-zlib", false, "whether to convert gzip to zlib") 44) 45 46// GZIP wraps a header and footer around deflate data. The format is described in 47// RFC 1952: https://www.ietf.org/rfc/rfc1952.txt 48const ( 49 flagText = 1 << 0 50 flagHCRC = 1 << 1 51 flagExtra = 1 << 2 52 flagName = 1 << 3 53 flagComment = 1 << 4 54) 55 56func main() { 57 if err := main1(); err != nil { 58 os.Stderr.WriteString(err.Error() + "\n") 59 os.Exit(1) 60 } 61} 62 63func main1() error { 64 flag.Parse() 65 for _, a := range flag.Args() { 66 if err := decode(a); err != nil { 67 return err 68 } 69 } 70 return nil 71} 72 73func decode(filename string) error { 74 src, err := ioutil.ReadFile(filename) 75 if err != nil { 76 return err 77 } 78 79 const ( 80 headerSize = 10 81 footerSize = 8 82 ) 83 if len(src) < headerSize+footerSize || src[0] != 0x1F || src[1] != 0x8B || src[2] != 0x08 { 84 return fmt.Errorf("not a GZIP") 85 } 86 if len(src) >= 0x10000000 { 87 return fmt.Errorf("file too large") 88 } 89 flags := src[3] 90 i := headerSize 91 src = src[:len(src)-footerSize] 92 93 if flags&flagExtra != 0 { 94 return fmt.Errorf("TODO: support gzip extra flag") 95 } 96 97 if flags&flagName != 0 { 98 if i, err = readString(src, i); err != nil { 99 return err 100 } 101 } 102 103 if flags&flagComment != 0 { 104 if i, err = readString(src, i); err != nil { 105 return err 106 } 107 } 108 109 if flags&flagHCRC != 0 { 110 return fmt.Errorf("TODO: support gzip HCRC flag") 111 } 112 113 // As a sanity check, the result should be valid deflate. 114 uncompressed, err := checkDeflate(src[i:]) 115 if err != nil { 116 return err 117 } 118 119 if *writeDeflate { 120 return doWriteDeflate(src[i:], uncompressed, filename) 121 } else if *writeZlib { 122 return doWriteZlib(src[i:], uncompressed, filename) 123 } 124 fmt.Printf("%7d %7d %x %s\n", i, len(src), md5.Sum(uncompressed), filename) 125 return nil 126} 127 128func doWriteDeflate(deflateCompressed []byte, uncompressed []byte, filename string) error { 129 if strings.HasSuffix(filename, ".gz") { 130 filename = filename[:len(filename)-3] 131 } 132 filename += ".deflate" 133 if err := ioutil.WriteFile(filename, deflateCompressed, 0666); err != nil { 134 return err 135 } 136 fmt.Printf("wrote %s\n", filename) 137 return nil 138} 139 140func doWriteZlib(deflateCompressed []byte, uncompressed []byte, filename string) error { 141 buf := bytes.NewBuffer(nil) 142 // The ZLIB header (as per https://www.ietf.org/rfc/rfc1950.txt) is 2 143 // bytes. 144 // 145 // The first byte's low 4 bits is the compression method: 8 means deflate. 146 // The first byte's high 4 bits is the compression info: 7 means a 32KiB 147 // deflate window size. 148 // 149 // The second byte's low 5 bits are a parity check. The 5th bit (0 in this 150 // case) indicates a preset dictionary. The high 2 bits (2 in this case) 151 // means the default compression algorithm. 152 buf.WriteString("\x78\x9c") 153 // Write the payload. 154 buf.Write(deflateCompressed) 155 // The ZLIB footer is 4 bytes: a big-endian checksum. 156 checksum := adler32.Checksum(uncompressed) 157 buf.WriteByte(uint8(checksum >> 24)) 158 buf.WriteByte(uint8(checksum >> 16)) 159 buf.WriteByte(uint8(checksum >> 8)) 160 buf.WriteByte(uint8(checksum >> 0)) 161 162 asZlib := buf.Bytes() 163 164 // As a sanity check, the result should be valid zlib. 165 if _, err := checkZlib(asZlib); err != nil { 166 return err 167 } 168 169 if strings.HasSuffix(filename, ".gz") { 170 filename = filename[:len(filename)-3] 171 } 172 filename += ".zlib" 173 if err := ioutil.WriteFile(filename, asZlib, 0666); err != nil { 174 return err 175 } 176 fmt.Printf("wrote %s\n", filename) 177 return nil 178} 179 180func readString(src []byte, i int) (int, error) { 181 for { 182 if i >= len(src) { 183 return 0, fmt.Errorf("bad GZIP string") 184 } 185 if src[i] == 0 { 186 return i + 1, nil 187 } 188 i++ 189 } 190} 191 192func checkDeflate(x []byte) ([]byte, error) { 193 rc := flate.NewReader(bytes.NewReader(x)) 194 defer rc.Close() 195 x, err := ioutil.ReadAll(rc) 196 if err != nil { 197 return nil, fmt.Errorf("data is not valid deflate: %v", err) 198 } 199 return x, nil 200} 201 202func checkZlib(x []byte) ([]byte, error) { 203 rc, err := zlib.NewReader(bytes.NewReader(x)) 204 if err != nil { 205 return nil, fmt.Errorf("data is not valid zlib: %v", err) 206 } 207 defer rc.Close() 208 x, err = ioutil.ReadAll(rc) 209 if err != nil { 210 return nil, fmt.Errorf("data is not valid zlib: %v", err) 211 } 212 return x, nil 213} 214