1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "bytes" 19 "compress/flate" 20 "flag" 21 "fmt" 22 "hash/crc32" 23 "io" 24 "io/ioutil" 25 "log" 26 "os" 27 "path/filepath" 28 "runtime" 29 "runtime/pprof" 30 "runtime/trace" 31 "strings" 32 "sync" 33 "time" 34 35 "android/soong/third_party/zip" 36) 37 38// Block size used during parallel compression of a single file. 39const parallelBlockSize = 1 * 1024 * 1024 // 1MB 40 41// Minimum file size to use parallel compression. It requires more 42// flate.Writer allocations, since we can't change the dictionary 43// during Reset 44const minParallelFileSize = parallelBlockSize * 6 45 46// Size of the ZIP compression window (32KB) 47const windowSize = 32 * 1024 48 49type nopCloser struct { 50 io.Writer 51} 52 53func (nopCloser) Close() error { 54 return nil 55} 56 57type fileArg struct { 58 pathPrefixInZip, sourcePrefixToStrip string 59 sourceFiles []string 60} 61 62type pathMapping struct { 63 dest, src string 64 zipMethod uint16 65} 66 67type uniqueSet map[string]bool 68 69func (u *uniqueSet) String() string { 70 return `""` 71} 72 73func (u *uniqueSet) Set(s string) error { 74 if _, found := (*u)[s]; found { 75 return fmt.Errorf("File %q was specified twice as a file to not deflate", s) 76 } else { 77 (*u)[s] = true 78 } 79 80 return nil 81} 82 83type fileArgs []fileArg 84 85type file struct{} 86 87type listFiles struct{} 88 89func (f *file) String() string { 90 return `""` 91} 92 93func (f *file) Set(s string) error { 94 if *relativeRoot == "" { 95 return fmt.Errorf("must pass -C before -f or -l") 96 } 97 98 fArgs = append(fArgs, fileArg{ 99 pathPrefixInZip: filepath.Clean(*rootPrefix), 100 sourcePrefixToStrip: filepath.Clean(*relativeRoot), 101 sourceFiles: []string{s}, 102 }) 103 104 return nil 105} 106 107func (l *listFiles) String() string { 108 return `""` 109} 110 111func (l *listFiles) Set(s string) error { 112 if *relativeRoot == "" { 113 return fmt.Errorf("must pass -C before -f or -l") 114 } 115 116 list, err := ioutil.ReadFile(s) 117 if err != nil { 118 return err 119 } 120 121 fArgs = append(fArgs, fileArg{ 122 pathPrefixInZip: filepath.Clean(*rootPrefix), 123 sourcePrefixToStrip: filepath.Clean(*relativeRoot), 124 sourceFiles: strings.Split(string(list), "\n"), 125 }) 126 127 return nil 128} 129 130var ( 131 out = flag.String("o", "", "file to write zip file to") 132 manifest = flag.String("m", "", "input jar manifest file name") 133 directories = flag.Bool("d", false, "include directories in zip") 134 rootPrefix = flag.String("P", "", "path prefix within the zip at which to place files") 135 relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument") 136 parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use") 137 compLevel = flag.Int("L", 5, "deflate compression level (0-9)") 138 139 fArgs fileArgs 140 nonDeflatedFiles = make(uniqueSet) 141 142 cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") 143 traceFile = flag.String("trace", "", "write trace to file") 144) 145 146func init() { 147 flag.Var(&listFiles{}, "l", "file containing list of .class files") 148 flag.Var(&file{}, "f", "file to include in zip") 149 flag.Var(&nonDeflatedFiles, "s", "file path to be stored within the zip without compression") 150} 151 152func usage() { 153 fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n") 154 flag.PrintDefaults() 155 os.Exit(2) 156} 157 158type zipWriter struct { 159 time time.Time 160 createdDirs map[string]bool 161 directories bool 162 163 errors chan error 164 writeOps chan chan *zipEntry 165 166 rateLimit *RateLimit 167 168 compressorPool sync.Pool 169 compLevel int 170} 171 172type zipEntry struct { 173 fh *zip.FileHeader 174 175 // List of delayed io.Reader 176 futureReaders chan chan io.Reader 177} 178 179func main() { 180 flag.Parse() 181 182 if *cpuProfile != "" { 183 f, err := os.Create(*cpuProfile) 184 if err != nil { 185 fmt.Fprintln(os.Stderr, err.Error()) 186 os.Exit(1) 187 } 188 defer f.Close() 189 pprof.StartCPUProfile(f) 190 defer pprof.StopCPUProfile() 191 } 192 193 if *traceFile != "" { 194 f, err := os.Create(*traceFile) 195 if err != nil { 196 fmt.Fprintln(os.Stderr, err.Error()) 197 os.Exit(1) 198 } 199 defer f.Close() 200 err = trace.Start(f) 201 if err != nil { 202 fmt.Fprintln(os.Stderr, err.Error()) 203 os.Exit(1) 204 } 205 defer trace.Stop() 206 } 207 208 if *out == "" { 209 fmt.Fprintf(os.Stderr, "error: -o is required\n") 210 usage() 211 } 212 213 w := &zipWriter{ 214 time: time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC), 215 createdDirs: make(map[string]bool), 216 directories: *directories, 217 compLevel: *compLevel, 218 } 219 220 pathMappings := []pathMapping{} 221 set := make(map[string]string) 222 223 for _, fa := range fArgs { 224 for _, src := range fa.sourceFiles { 225 if err := fillPathPairs(fa.pathPrefixInZip, 226 fa.sourcePrefixToStrip, src, set, &pathMappings); err != nil { 227 log.Fatal(err) 228 } 229 } 230 } 231 232 err := w.write(*out, pathMappings, *manifest) 233 if err != nil { 234 fmt.Fprintln(os.Stderr, err.Error()) 235 os.Exit(1) 236 } 237} 238 239func fillPathPairs(prefix, rel, src string, set map[string]string, pathMappings *[]pathMapping) error { 240 src = strings.TrimSpace(src) 241 if src == "" { 242 return nil 243 } 244 src = filepath.Clean(src) 245 dest, err := filepath.Rel(rel, src) 246 if err != nil { 247 return err 248 } 249 dest = filepath.Join(prefix, dest) 250 251 if _, found := set[dest]; found { 252 return fmt.Errorf("found two file paths to be copied into dest path: %q,"+ 253 " both [%q]%q and [%q]%q!", 254 dest, dest, src, dest, set[dest]) 255 } else { 256 set[dest] = src 257 } 258 259 zipMethod := zip.Deflate 260 if _, found := nonDeflatedFiles[dest]; found { 261 zipMethod = zip.Store 262 } 263 *pathMappings = append(*pathMappings, 264 pathMapping{dest: dest, src: src, zipMethod: zipMethod}) 265 266 return nil 267} 268 269func (z *zipWriter) write(out string, pathMappings []pathMapping, manifest string) error { 270 f, err := os.Create(out) 271 if err != nil { 272 return err 273 } 274 275 defer f.Close() 276 defer func() { 277 if err != nil { 278 os.Remove(out) 279 } 280 }() 281 282 z.errors = make(chan error) 283 defer close(z.errors) 284 285 // This channel size can be essentially unlimited -- it's used as a fifo 286 // queue decouple the CPU and IO loads. Directories don't require any 287 // compression time, but still cost some IO. Similar with small files that 288 // can be very fast to compress. Some files that are more difficult to 289 // compress won't take a corresponding longer time writing out. 290 // 291 // The optimum size here depends on your CPU and IO characteristics, and 292 // the the layout of your zip file. 1000 was chosen mostly at random as 293 // something that worked reasonably well for a test file. 294 // 295 // The RateLimit object will put the upper bounds on the number of 296 // parallel compressions and outstanding buffers. 297 z.writeOps = make(chan chan *zipEntry, 1000) 298 z.rateLimit = NewRateLimit(*parallelJobs, 0) 299 defer z.rateLimit.Stop() 300 301 go func() { 302 var err error 303 defer close(z.writeOps) 304 305 for _, ele := range pathMappings { 306 err = z.writeFile(ele.dest, ele.src, ele.zipMethod) 307 if err != nil { 308 z.errors <- err 309 return 310 } 311 } 312 313 if manifest != "" { 314 err = z.writeFile("META-INF/MANIFEST.MF", manifest, zip.Deflate) 315 if err != nil { 316 z.errors <- err 317 return 318 } 319 } 320 }() 321 322 zipw := zip.NewWriter(f) 323 324 var currentWriteOpChan chan *zipEntry 325 var currentWriter io.WriteCloser 326 var currentReaders chan chan io.Reader 327 var currentReader chan io.Reader 328 var done bool 329 330 for !done { 331 var writeOpsChan chan chan *zipEntry 332 var writeOpChan chan *zipEntry 333 var readersChan chan chan io.Reader 334 335 if currentReader != nil { 336 // Only read and process errors 337 } else if currentReaders != nil { 338 readersChan = currentReaders 339 } else if currentWriteOpChan != nil { 340 writeOpChan = currentWriteOpChan 341 } else { 342 writeOpsChan = z.writeOps 343 } 344 345 select { 346 case writeOp, ok := <-writeOpsChan: 347 if !ok { 348 done = true 349 } 350 351 currentWriteOpChan = writeOp 352 353 case op := <-writeOpChan: 354 currentWriteOpChan = nil 355 356 if op.fh.Method == zip.Deflate { 357 currentWriter, err = zipw.CreateCompressedHeader(op.fh) 358 } else { 359 var zw io.Writer 360 zw, err = zipw.CreateHeader(op.fh) 361 currentWriter = nopCloser{zw} 362 } 363 if err != nil { 364 return err 365 } 366 367 currentReaders = op.futureReaders 368 if op.futureReaders == nil { 369 currentWriter.Close() 370 currentWriter = nil 371 } 372 373 case futureReader, ok := <-readersChan: 374 if !ok { 375 // Done with reading 376 currentWriter.Close() 377 currentWriter = nil 378 currentReaders = nil 379 } 380 381 currentReader = futureReader 382 383 case reader := <-currentReader: 384 var count int64 385 count, err = io.Copy(currentWriter, reader) 386 if err != nil { 387 return err 388 } 389 z.rateLimit.Release(int(count)) 390 391 currentReader = nil 392 393 case err = <-z.errors: 394 return err 395 } 396 } 397 398 // One last chance to catch an error 399 select { 400 case err = <-z.errors: 401 return err 402 default: 403 zipw.Close() 404 return nil 405 } 406} 407 408func (z *zipWriter) writeFile(dest, src string, method uint16) error { 409 var fileSize int64 410 var executable bool 411 412 if s, err := os.Lstat(src); err != nil { 413 return err 414 } else if s.IsDir() { 415 if z.directories { 416 return z.writeDirectory(dest) 417 } 418 return nil 419 } else if s.Mode()&os.ModeSymlink != 0 { 420 return z.writeSymlink(dest, src) 421 } else if !s.Mode().IsRegular() { 422 return fmt.Errorf("%s is not a file, directory, or symlink", src) 423 } else { 424 fileSize = s.Size() 425 executable = s.Mode()&0100 != 0 426 } 427 428 if z.directories { 429 dir, _ := filepath.Split(dest) 430 err := z.writeDirectory(dir) 431 if err != nil { 432 return err 433 } 434 } 435 436 compressChan := make(chan *zipEntry, 1) 437 z.writeOps <- compressChan 438 439 // Pre-fill a zipEntry, it will be sent in the compressChan once 440 // we're sure about the Method and CRC. 441 ze := &zipEntry{ 442 fh: &zip.FileHeader{ 443 Name: dest, 444 Method: method, 445 446 UncompressedSize64: uint64(fileSize), 447 }, 448 } 449 ze.fh.SetModTime(z.time) 450 if executable { 451 ze.fh.SetMode(0700) 452 } 453 454 r, err := os.Open(src) 455 if err != nil { 456 return err 457 } 458 459 exec := z.rateLimit.RequestExecution() 460 461 if method == zip.Deflate && fileSize >= minParallelFileSize { 462 wg := new(sync.WaitGroup) 463 464 // Allocate enough buffer to hold all readers. We'll limit 465 // this based on actual buffer sizes in RateLimit. 466 ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1) 467 468 // Calculate the CRC in the background, since reading the entire 469 // file could take a while. 470 // 471 // We could split this up into chuncks as well, but it's faster 472 // than the compression. Due to the Go Zip API, we also need to 473 // know the result before we can begin writing the compressed 474 // data out to the zipfile. 475 wg.Add(1) 476 go z.crcFile(r, ze, exec, compressChan, wg) 477 478 for start := int64(0); start < fileSize; start += parallelBlockSize { 479 sr := io.NewSectionReader(r, start, parallelBlockSize) 480 resultChan := make(chan io.Reader, 1) 481 ze.futureReaders <- resultChan 482 483 exec := z.rateLimit.RequestExecution() 484 485 last := !(start+parallelBlockSize < fileSize) 486 var dict []byte 487 if start >= windowSize { 488 dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize)) 489 } 490 491 wg.Add(1) 492 go z.compressPartialFile(sr, dict, last, exec, resultChan, wg) 493 } 494 495 close(ze.futureReaders) 496 497 // Close the file handle after all readers are done 498 go func(wg *sync.WaitGroup, f *os.File) { 499 wg.Wait() 500 f.Close() 501 }(wg, r) 502 } else { 503 go z.compressWholeFile(ze, r, exec, compressChan) 504 } 505 506 return nil 507} 508 509func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) { 510 defer wg.Done() 511 defer exec.Finish(0) 512 513 crc := crc32.NewIEEE() 514 _, err := io.Copy(crc, r) 515 if err != nil { 516 z.errors <- err 517 return 518 } 519 520 ze.fh.CRC32 = crc.Sum32() 521 resultChan <- ze 522 close(resultChan) 523} 524 525func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) { 526 defer wg.Done() 527 528 result, err := z.compressBlock(r, dict, last) 529 if err != nil { 530 z.errors <- err 531 return 532 } 533 534 exec.Finish(result.Len()) 535 resultChan <- result 536} 537 538func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) { 539 buf := new(bytes.Buffer) 540 var fw *flate.Writer 541 var err error 542 if len(dict) > 0 { 543 // There's no way to Reset a Writer with a new dictionary, so 544 // don't use the Pool 545 fw, err = flate.NewWriterDict(buf, z.compLevel, dict) 546 } else { 547 var ok bool 548 if fw, ok = z.compressorPool.Get().(*flate.Writer); ok { 549 fw.Reset(buf) 550 } else { 551 fw, err = flate.NewWriter(buf, z.compLevel) 552 } 553 defer z.compressorPool.Put(fw) 554 } 555 if err != nil { 556 return nil, err 557 } 558 559 _, err = io.Copy(fw, r) 560 if err != nil { 561 return nil, err 562 } 563 if last { 564 fw.Close() 565 } else { 566 fw.Flush() 567 } 568 569 return buf, nil 570} 571 572func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) { 573 var bufSize int 574 575 defer r.Close() 576 577 crc := crc32.NewIEEE() 578 _, err := io.Copy(crc, r) 579 if err != nil { 580 z.errors <- err 581 return 582 } 583 584 ze.fh.CRC32 = crc.Sum32() 585 586 _, err = r.Seek(0, 0) 587 if err != nil { 588 z.errors <- err 589 return 590 } 591 592 readFile := func(r *os.File) ([]byte, error) { 593 _, err = r.Seek(0, 0) 594 if err != nil { 595 return nil, err 596 } 597 598 buf, err := ioutil.ReadAll(r) 599 if err != nil { 600 return nil, err 601 } 602 603 return buf, nil 604 } 605 606 ze.futureReaders = make(chan chan io.Reader, 1) 607 futureReader := make(chan io.Reader, 1) 608 ze.futureReaders <- futureReader 609 close(ze.futureReaders) 610 611 if ze.fh.Method == zip.Deflate { 612 compressed, err := z.compressBlock(r, nil, true) 613 if err != nil { 614 z.errors <- err 615 return 616 } 617 if uint64(compressed.Len()) < ze.fh.UncompressedSize64 { 618 futureReader <- compressed 619 bufSize = compressed.Len() 620 } else { 621 buf, err := readFile(r) 622 if err != nil { 623 z.errors <- err 624 return 625 } 626 ze.fh.Method = zip.Store 627 futureReader <- bytes.NewReader(buf) 628 bufSize = int(ze.fh.UncompressedSize64) 629 } 630 } else { 631 buf, err := readFile(r) 632 if err != nil { 633 z.errors <- err 634 return 635 } 636 ze.fh.Method = zip.Store 637 futureReader <- bytes.NewReader(buf) 638 bufSize = int(ze.fh.UncompressedSize64) 639 } 640 641 exec.Finish(bufSize) 642 close(futureReader) 643 644 compressChan <- ze 645 close(compressChan) 646} 647 648func (z *zipWriter) writeDirectory(dir string) error { 649 if dir != "" && !strings.HasSuffix(dir, "/") { 650 dir = dir + "/" 651 } 652 653 for dir != "" && dir != "./" && !z.createdDirs[dir] { 654 z.createdDirs[dir] = true 655 656 dirHeader := &zip.FileHeader{ 657 Name: dir, 658 } 659 dirHeader.SetMode(0700 | os.ModeDir) 660 dirHeader.SetModTime(z.time) 661 662 ze := make(chan *zipEntry, 1) 663 ze <- &zipEntry{ 664 fh: dirHeader, 665 } 666 close(ze) 667 z.writeOps <- ze 668 669 dir, _ = filepath.Split(dir) 670 } 671 672 return nil 673} 674 675func (z *zipWriter) writeSymlink(rel, file string) error { 676 if z.directories { 677 dir, _ := filepath.Split(rel) 678 if err := z.writeDirectory(dir); err != nil { 679 return err 680 } 681 } 682 683 fileHeader := &zip.FileHeader{ 684 Name: rel, 685 } 686 fileHeader.SetModTime(z.time) 687 fileHeader.SetMode(0700 | os.ModeSymlink) 688 689 dest, err := os.Readlink(file) 690 if err != nil { 691 return err 692 } 693 694 ze := make(chan *zipEntry, 1) 695 futureReaders := make(chan chan io.Reader, 1) 696 futureReader := make(chan io.Reader, 1) 697 futureReaders <- futureReader 698 close(futureReaders) 699 futureReader <- bytes.NewBufferString(dest) 700 close(futureReader) 701 702 // We didn't ask permission to execute, since this should be very short 703 // but we still need to increment the outstanding buffer sizes, since 704 // the read will decrement the buffer size. 705 z.rateLimit.Release(-len(dest)) 706 707 ze <- &zipEntry{ 708 fh: fileHeader, 709 futureReaders: futureReaders, 710 } 711 close(ze) 712 z.writeOps <- ze 713 714 return nil 715} 716