1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "bytes" 19 "compress/flate" 20 "flag" 21 "fmt" 22 "hash/crc32" 23 "io" 24 "io/ioutil" 25 "log" 26 "os" 27 "path/filepath" 28 "runtime" 29 "runtime/pprof" 30 "runtime/trace" 31 "strings" 32 "sync" 33 "time" 34 35 "android/soong/third_party/zip" 36) 37 38// Block size used during parallel compression of a single file. 39const parallelBlockSize = 1 * 1024 * 1024 // 1MB 40 41// Minimum file size to use parallel compression. It requires more 42// flate.Writer allocations, since we can't change the dictionary 43// during Reset 44const minParallelFileSize = parallelBlockSize * 6 45 46// Size of the ZIP compression window (32KB) 47const windowSize = 32 * 1024 48 49type nopCloser struct { 50 io.Writer 51} 52 53func (nopCloser) Close() error { 54 return nil 55} 56 57type fileArg struct { 58 rootPrefix, relativeRoot, file string 59} 60 61type pathMapping struct { 62 dest, src string 63} 64 65type fileArgs []fileArg 66 67func (l *fileArgs) String() string { 68 return `""` 69} 70 71func (l *fileArgs) Set(s string) error { 72 if *relativeRoot == "" { 73 return fmt.Errorf("must pass -C before -f or -l") 74 } 75 76 *l = append(*l, 77 fileArg{rootPrefix: filepath.Clean(*rootPrefix), 78 relativeRoot: filepath.Clean(*relativeRoot), 79 file: s}) 80 return nil 81} 82 83func (l *fileArgs) Get() interface{} { 84 return l 85} 86 87var ( 88 out = flag.String("o", "", "file to write zip file to") 89 manifest = flag.String("m", "", "input jar manifest file name") 90 directories = flag.Bool("d", false, "include directories in zip") 91 rootPrefix = flag.String("P", "", "path prefix within the zip at which to place files") 92 relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument") 93 parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use") 94 compLevel = flag.Int("L", 5, "deflate compression level (0-9)") 95 96 listFiles fileArgs 97 files fileArgs 98 99 cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") 100 traceFile = flag.String("trace", "", "write trace to file") 101) 102 103func init() { 104 flag.Var(&listFiles, "l", "file containing list of .class files") 105 flag.Var(&files, "f", "file to include in zip") 106} 107 108func usage() { 109 fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n") 110 flag.PrintDefaults() 111 os.Exit(2) 112} 113 114type zipWriter struct { 115 time time.Time 116 createdDirs map[string]bool 117 directories bool 118 119 errors chan error 120 writeOps chan chan *zipEntry 121 122 rateLimit *RateLimit 123 124 compressorPool sync.Pool 125 compLevel int 126} 127 128type zipEntry struct { 129 fh *zip.FileHeader 130 131 // List of delayed io.Reader 132 futureReaders chan chan io.Reader 133} 134 135func main() { 136 flag.Parse() 137 138 if *cpuProfile != "" { 139 f, err := os.Create(*cpuProfile) 140 if err != nil { 141 fmt.Fprintln(os.Stderr, err.Error()) 142 os.Exit(1) 143 } 144 defer f.Close() 145 pprof.StartCPUProfile(f) 146 defer pprof.StopCPUProfile() 147 } 148 149 if *traceFile != "" { 150 f, err := os.Create(*traceFile) 151 if err != nil { 152 fmt.Fprintln(os.Stderr, err.Error()) 153 os.Exit(1) 154 } 155 defer f.Close() 156 err = trace.Start(f) 157 if err != nil { 158 fmt.Fprintln(os.Stderr, err.Error()) 159 os.Exit(1) 160 } 161 defer trace.Stop() 162 } 163 164 if *out == "" { 165 fmt.Fprintf(os.Stderr, "error: -o is required\n") 166 usage() 167 } 168 169 w := &zipWriter{ 170 time: time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC), 171 createdDirs: make(map[string]bool), 172 directories: *directories, 173 compLevel: *compLevel, 174 } 175 176 pathMappings := []pathMapping{} 177 set := make(map[string]string) 178 179 // load listFiles, which specify other files to include. 180 for _, l := range listFiles { 181 list, err := ioutil.ReadFile(l.file) 182 if err != nil { 183 fmt.Fprintln(os.Stderr, err.Error()) 184 os.Exit(1) 185 } 186 srcs := strings.Split(string(list), "\n") 187 for _, src := range srcs { 188 if err := fillPathPairs(l.rootPrefix, l.relativeRoot, src, 189 set, &pathMappings); err != nil { 190 log.Fatal(err) 191 } 192 } 193 } 194 195 // also include the usual files that are to be added directly. 196 for _, f := range files { 197 if err := fillPathPairs(f.rootPrefix, f.relativeRoot, 198 f.file, set, &pathMappings); err != nil { 199 log.Fatal(err) 200 } 201 } 202 203 err := w.write(*out, pathMappings, *manifest) 204 if err != nil { 205 fmt.Fprintln(os.Stderr, err.Error()) 206 os.Exit(1) 207 } 208} 209 210func fillPathPairs(prefix, rel, src string, set map[string]string, pathMappings *[]pathMapping) error { 211 src = strings.TrimSpace(src) 212 if src == "" { 213 return nil 214 } 215 src = filepath.Clean(src) 216 dest, err := filepath.Rel(rel, src) 217 if err != nil { 218 return err 219 } 220 dest = filepath.Join(prefix, dest) 221 222 if _, found := set[dest]; found { 223 return fmt.Errorf("found two file paths to be copied into dest path: %q,"+ 224 " both [%q]%q and [%q]%q!", 225 dest, dest, src, dest, set[dest]) 226 } else { 227 set[dest] = src 228 } 229 230 *pathMappings = append(*pathMappings, pathMapping{dest: dest, src: src}) 231 232 return nil 233} 234 235func (z *zipWriter) write(out string, pathMappings []pathMapping, manifest string) error { 236 f, err := os.Create(out) 237 if err != nil { 238 return err 239 } 240 241 defer f.Close() 242 defer func() { 243 if err != nil { 244 os.Remove(out) 245 } 246 }() 247 248 z.errors = make(chan error) 249 defer close(z.errors) 250 251 // This channel size can be essentially unlimited -- it's used as a fifo 252 // queue decouple the CPU and IO loads. Directories don't require any 253 // compression time, but still cost some IO. Similar with small files that 254 // can be very fast to compress. Some files that are more difficult to 255 // compress won't take a corresponding longer time writing out. 256 // 257 // The optimum size here depends on your CPU and IO characteristics, and 258 // the the layout of your zip file. 1000 was chosen mostly at random as 259 // something that worked reasonably well for a test file. 260 // 261 // The RateLimit object will put the upper bounds on the number of 262 // parallel compressions and outstanding buffers. 263 z.writeOps = make(chan chan *zipEntry, 1000) 264 z.rateLimit = NewRateLimit(*parallelJobs, 0) 265 defer z.rateLimit.Stop() 266 267 go func() { 268 var err error 269 defer close(z.writeOps) 270 271 for _, ele := range pathMappings { 272 err = z.writeFile(ele.dest, ele.src) 273 if err != nil { 274 z.errors <- err 275 return 276 } 277 } 278 279 if manifest != "" { 280 err = z.writeFile("META-INF/MANIFEST.MF", manifest) 281 if err != nil { 282 z.errors <- err 283 return 284 } 285 } 286 }() 287 288 zipw := zip.NewWriter(f) 289 290 var currentWriteOpChan chan *zipEntry 291 var currentWriter io.WriteCloser 292 var currentReaders chan chan io.Reader 293 var currentReader chan io.Reader 294 var done bool 295 296 for !done { 297 var writeOpsChan chan chan *zipEntry 298 var writeOpChan chan *zipEntry 299 var readersChan chan chan io.Reader 300 301 if currentReader != nil { 302 // Only read and process errors 303 } else if currentReaders != nil { 304 readersChan = currentReaders 305 } else if currentWriteOpChan != nil { 306 writeOpChan = currentWriteOpChan 307 } else { 308 writeOpsChan = z.writeOps 309 } 310 311 select { 312 case writeOp, ok := <-writeOpsChan: 313 if !ok { 314 done = true 315 } 316 317 currentWriteOpChan = writeOp 318 319 case op := <-writeOpChan: 320 currentWriteOpChan = nil 321 322 if op.fh.Method == zip.Deflate { 323 currentWriter, err = zipw.CreateCompressedHeader(op.fh) 324 } else { 325 var zw io.Writer 326 zw, err = zipw.CreateHeader(op.fh) 327 currentWriter = nopCloser{zw} 328 } 329 if err != nil { 330 return err 331 } 332 333 currentReaders = op.futureReaders 334 if op.futureReaders == nil { 335 currentWriter.Close() 336 currentWriter = nil 337 } 338 339 case futureReader, ok := <-readersChan: 340 if !ok { 341 // Done with reading 342 currentWriter.Close() 343 currentWriter = nil 344 currentReaders = nil 345 } 346 347 currentReader = futureReader 348 349 case reader := <-currentReader: 350 var count int64 351 count, err = io.Copy(currentWriter, reader) 352 if err != nil { 353 return err 354 } 355 z.rateLimit.Release(int(count)) 356 357 currentReader = nil 358 359 case err = <-z.errors: 360 return err 361 } 362 } 363 364 // One last chance to catch an error 365 select { 366 case err = <-z.errors: 367 return err 368 default: 369 zipw.Close() 370 return nil 371 } 372} 373 374func (z *zipWriter) writeFile(dest, src string) error { 375 var fileSize int64 376 var executable bool 377 378 if s, err := os.Lstat(src); err != nil { 379 return err 380 } else if s.IsDir() { 381 if z.directories { 382 return z.writeDirectory(dest) 383 } 384 return nil 385 } else if s.Mode()&os.ModeSymlink != 0 { 386 return z.writeSymlink(dest, src) 387 } else if !s.Mode().IsRegular() { 388 return fmt.Errorf("%s is not a file, directory, or symlink", src) 389 } else { 390 fileSize = s.Size() 391 executable = s.Mode()&0100 != 0 392 } 393 394 if z.directories { 395 dir, _ := filepath.Split(dest) 396 err := z.writeDirectory(dir) 397 if err != nil { 398 return err 399 } 400 } 401 402 compressChan := make(chan *zipEntry, 1) 403 z.writeOps <- compressChan 404 405 // Pre-fill a zipEntry, it will be sent in the compressChan once 406 // we're sure about the Method and CRC. 407 ze := &zipEntry{ 408 fh: &zip.FileHeader{ 409 Name: dest, 410 Method: zip.Deflate, 411 412 UncompressedSize64: uint64(fileSize), 413 }, 414 } 415 ze.fh.SetModTime(z.time) 416 if executable { 417 ze.fh.SetMode(0700) 418 } 419 420 r, err := os.Open(src) 421 if err != nil { 422 return err 423 } 424 425 exec := z.rateLimit.RequestExecution() 426 427 if fileSize >= minParallelFileSize { 428 wg := new(sync.WaitGroup) 429 430 // Allocate enough buffer to hold all readers. We'll limit 431 // this based on actual buffer sizes in RateLimit. 432 ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1) 433 434 // Calculate the CRC in the background, since reading the entire 435 // file could take a while. 436 // 437 // We could split this up into chuncks as well, but it's faster 438 // than the compression. Due to the Go Zip API, we also need to 439 // know the result before we can begin writing the compressed 440 // data out to the zipfile. 441 wg.Add(1) 442 go z.crcFile(r, ze, exec, compressChan, wg) 443 444 for start := int64(0); start < fileSize; start += parallelBlockSize { 445 sr := io.NewSectionReader(r, start, parallelBlockSize) 446 resultChan := make(chan io.Reader, 1) 447 ze.futureReaders <- resultChan 448 449 exec := z.rateLimit.RequestExecution() 450 451 last := !(start+parallelBlockSize < fileSize) 452 var dict []byte 453 if start >= windowSize { 454 dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize)) 455 } 456 457 wg.Add(1) 458 go z.compressPartialFile(sr, dict, last, exec, resultChan, wg) 459 } 460 461 close(ze.futureReaders) 462 463 // Close the file handle after all readers are done 464 go func(wg *sync.WaitGroup, f *os.File) { 465 wg.Wait() 466 f.Close() 467 }(wg, r) 468 } else { 469 go z.compressWholeFile(ze, r, exec, compressChan) 470 } 471 472 return nil 473} 474 475func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) { 476 defer wg.Done() 477 defer exec.Finish(0) 478 479 crc := crc32.NewIEEE() 480 _, err := io.Copy(crc, r) 481 if err != nil { 482 z.errors <- err 483 return 484 } 485 486 ze.fh.CRC32 = crc.Sum32() 487 resultChan <- ze 488 close(resultChan) 489} 490 491func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) { 492 defer wg.Done() 493 494 result, err := z.compressBlock(r, dict, last) 495 if err != nil { 496 z.errors <- err 497 return 498 } 499 500 exec.Finish(result.Len()) 501 resultChan <- result 502} 503 504func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) { 505 buf := new(bytes.Buffer) 506 var fw *flate.Writer 507 var err error 508 if len(dict) > 0 { 509 // There's no way to Reset a Writer with a new dictionary, so 510 // don't use the Pool 511 fw, err = flate.NewWriterDict(buf, z.compLevel, dict) 512 } else { 513 var ok bool 514 if fw, ok = z.compressorPool.Get().(*flate.Writer); ok { 515 fw.Reset(buf) 516 } else { 517 fw, err = flate.NewWriter(buf, z.compLevel) 518 } 519 defer z.compressorPool.Put(fw) 520 } 521 if err != nil { 522 return nil, err 523 } 524 525 _, err = io.Copy(fw, r) 526 if err != nil { 527 return nil, err 528 } 529 if last { 530 fw.Close() 531 } else { 532 fw.Flush() 533 } 534 535 return buf, nil 536} 537 538func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) { 539 var bufSize int 540 541 defer r.Close() 542 543 crc := crc32.NewIEEE() 544 _, err := io.Copy(crc, r) 545 if err != nil { 546 z.errors <- err 547 return 548 } 549 550 ze.fh.CRC32 = crc.Sum32() 551 552 _, err = r.Seek(0, 0) 553 if err != nil { 554 z.errors <- err 555 return 556 } 557 558 compressed, err := z.compressBlock(r, nil, true) 559 560 ze.futureReaders = make(chan chan io.Reader, 1) 561 futureReader := make(chan io.Reader, 1) 562 ze.futureReaders <- futureReader 563 close(ze.futureReaders) 564 565 if uint64(compressed.Len()) < ze.fh.UncompressedSize64 { 566 futureReader <- compressed 567 bufSize = compressed.Len() 568 } else { 569 _, err = r.Seek(0, 0) 570 if err != nil { 571 z.errors <- err 572 return 573 } 574 575 buf, err := ioutil.ReadAll(r) 576 if err != nil { 577 z.errors <- err 578 return 579 } 580 581 ze.fh.Method = zip.Store 582 futureReader <- bytes.NewReader(buf) 583 bufSize = int(ze.fh.UncompressedSize64) 584 } 585 exec.Finish(bufSize) 586 close(futureReader) 587 588 compressChan <- ze 589 close(compressChan) 590} 591 592func (z *zipWriter) writeDirectory(dir string) error { 593 if dir != "" && !strings.HasSuffix(dir, "/") { 594 dir = dir + "/" 595 } 596 597 for dir != "" && dir != "./" && !z.createdDirs[dir] { 598 z.createdDirs[dir] = true 599 600 dirHeader := &zip.FileHeader{ 601 Name: dir, 602 } 603 dirHeader.SetMode(0700 | os.ModeDir) 604 dirHeader.SetModTime(z.time) 605 606 ze := make(chan *zipEntry, 1) 607 ze <- &zipEntry{ 608 fh: dirHeader, 609 } 610 close(ze) 611 z.writeOps <- ze 612 613 dir, _ = filepath.Split(dir) 614 } 615 616 return nil 617} 618 619func (z *zipWriter) writeSymlink(rel, file string) error { 620 if z.directories { 621 dir, _ := filepath.Split(rel) 622 if err := z.writeDirectory(dir); err != nil { 623 return err 624 } 625 } 626 627 fileHeader := &zip.FileHeader{ 628 Name: rel, 629 } 630 fileHeader.SetModTime(z.time) 631 fileHeader.SetMode(0700 | os.ModeSymlink) 632 633 dest, err := os.Readlink(file) 634 if err != nil { 635 return err 636 } 637 638 ze := make(chan *zipEntry, 1) 639 futureReaders := make(chan chan io.Reader, 1) 640 futureReader := make(chan io.Reader, 1) 641 futureReaders <- futureReader 642 close(futureReaders) 643 futureReader <- bytes.NewBufferString(dest) 644 close(futureReader) 645 646 // We didn't ask permission to execute, since this should be very short 647 // but we still need to increment the outstanding buffer sizes, since 648 // the read will decrement the buffer size. 649 z.rateLimit.Release(-len(dest)) 650 651 ze <- &zipEntry{ 652 fh: fileHeader, 653 futureReaders: futureReaders, 654 } 655 close(ze) 656 z.writeOps <- ze 657 658 return nil 659} 660