• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18	"bytes"
19	"compress/flate"
20	"flag"
21	"fmt"
22	"hash/crc32"
23	"io"
24	"io/ioutil"
25	"log"
26	"os"
27	"path/filepath"
28	"runtime"
29	"runtime/pprof"
30	"runtime/trace"
31	"strings"
32	"sync"
33	"time"
34
35	"android/soong/third_party/zip"
36)
37
38// Block size used during parallel compression of a single file.
39const parallelBlockSize = 1 * 1024 * 1024 // 1MB
40
41// Minimum file size to use parallel compression. It requires more
42// flate.Writer allocations, since we can't change the dictionary
43// during Reset
44const minParallelFileSize = parallelBlockSize * 6
45
46// Size of the ZIP compression window (32KB)
47const windowSize = 32 * 1024
48
49type nopCloser struct {
50	io.Writer
51}
52
53func (nopCloser) Close() error {
54	return nil
55}
56
57type fileArg struct {
58	pathPrefixInZip, sourcePrefixToStrip string
59	sourceFiles                          []string
60}
61
62type pathMapping struct {
63	dest, src string
64	zipMethod uint16
65}
66
67type uniqueSet map[string]bool
68
69func (u *uniqueSet) String() string {
70	return `""`
71}
72
73func (u *uniqueSet) Set(s string) error {
74	if _, found := (*u)[s]; found {
75		return fmt.Errorf("File %q was specified twice as a file to not deflate", s)
76	} else {
77		(*u)[s] = true
78	}
79
80	return nil
81}
82
83type fileArgs []fileArg
84
85type file struct{}
86
87type listFiles struct{}
88
89func (f *file) String() string {
90	return `""`
91}
92
93func (f *file) Set(s string) error {
94	if *relativeRoot == "" {
95		return fmt.Errorf("must pass -C before -f or -l")
96	}
97
98	fArgs = append(fArgs, fileArg{
99		pathPrefixInZip:     filepath.Clean(*rootPrefix),
100		sourcePrefixToStrip: filepath.Clean(*relativeRoot),
101		sourceFiles:         []string{s},
102	})
103
104	return nil
105}
106
107func (l *listFiles) String() string {
108	return `""`
109}
110
111func (l *listFiles) Set(s string) error {
112	if *relativeRoot == "" {
113		return fmt.Errorf("must pass -C before -f or -l")
114	}
115
116	list, err := ioutil.ReadFile(s)
117	if err != nil {
118		return err
119	}
120
121	fArgs = append(fArgs, fileArg{
122		pathPrefixInZip:     filepath.Clean(*rootPrefix),
123		sourcePrefixToStrip: filepath.Clean(*relativeRoot),
124		sourceFiles:         strings.Split(string(list), "\n"),
125	})
126
127	return nil
128}
129
130var (
131	out          = flag.String("o", "", "file to write zip file to")
132	manifest     = flag.String("m", "", "input jar manifest file name")
133	directories  = flag.Bool("d", false, "include directories in zip")
134	rootPrefix   = flag.String("P", "", "path prefix within the zip at which to place files")
135	relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument")
136	parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use")
137	compLevel    = flag.Int("L", 5, "deflate compression level (0-9)")
138
139	fArgs            fileArgs
140	nonDeflatedFiles = make(uniqueSet)
141
142	cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
143	traceFile  = flag.String("trace", "", "write trace to file")
144)
145
146func init() {
147	flag.Var(&listFiles{}, "l", "file containing list of .class files")
148	flag.Var(&file{}, "f", "file to include in zip")
149	flag.Var(&nonDeflatedFiles, "s", "file path to be stored within the zip without compression")
150}
151
152func usage() {
153	fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n")
154	flag.PrintDefaults()
155	os.Exit(2)
156}
157
158type zipWriter struct {
159	time        time.Time
160	createdDirs map[string]bool
161	directories bool
162
163	errors   chan error
164	writeOps chan chan *zipEntry
165
166	rateLimit *RateLimit
167
168	compressorPool sync.Pool
169	compLevel      int
170}
171
172type zipEntry struct {
173	fh *zip.FileHeader
174
175	// List of delayed io.Reader
176	futureReaders chan chan io.Reader
177}
178
179func main() {
180	flag.Parse()
181
182	if *cpuProfile != "" {
183		f, err := os.Create(*cpuProfile)
184		if err != nil {
185			fmt.Fprintln(os.Stderr, err.Error())
186			os.Exit(1)
187		}
188		defer f.Close()
189		pprof.StartCPUProfile(f)
190		defer pprof.StopCPUProfile()
191	}
192
193	if *traceFile != "" {
194		f, err := os.Create(*traceFile)
195		if err != nil {
196			fmt.Fprintln(os.Stderr, err.Error())
197			os.Exit(1)
198		}
199		defer f.Close()
200		err = trace.Start(f)
201		if err != nil {
202			fmt.Fprintln(os.Stderr, err.Error())
203			os.Exit(1)
204		}
205		defer trace.Stop()
206	}
207
208	if *out == "" {
209		fmt.Fprintf(os.Stderr, "error: -o is required\n")
210		usage()
211	}
212
213	w := &zipWriter{
214		time:        time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC),
215		createdDirs: make(map[string]bool),
216		directories: *directories,
217		compLevel:   *compLevel,
218	}
219
220	pathMappings := []pathMapping{}
221	set := make(map[string]string)
222
223	for _, fa := range fArgs {
224		for _, src := range fa.sourceFiles {
225			if err := fillPathPairs(fa.pathPrefixInZip,
226				fa.sourcePrefixToStrip, src, set, &pathMappings); err != nil {
227				log.Fatal(err)
228			}
229		}
230	}
231
232	err := w.write(*out, pathMappings, *manifest)
233	if err != nil {
234		fmt.Fprintln(os.Stderr, err.Error())
235		os.Exit(1)
236	}
237}
238
239func fillPathPairs(prefix, rel, src string, set map[string]string, pathMappings *[]pathMapping) error {
240	src = strings.TrimSpace(src)
241	if src == "" {
242		return nil
243	}
244	src = filepath.Clean(src)
245	dest, err := filepath.Rel(rel, src)
246	if err != nil {
247		return err
248	}
249	dest = filepath.Join(prefix, dest)
250
251	if _, found := set[dest]; found {
252		return fmt.Errorf("found two file paths to be copied into dest path: %q,"+
253			" both [%q]%q and [%q]%q!",
254			dest, dest, src, dest, set[dest])
255	} else {
256		set[dest] = src
257	}
258
259	zipMethod := zip.Deflate
260	if _, found := nonDeflatedFiles[dest]; found {
261		zipMethod = zip.Store
262	}
263	*pathMappings = append(*pathMappings,
264		pathMapping{dest: dest, src: src, zipMethod: zipMethod})
265
266	return nil
267}
268
269func (z *zipWriter) write(out string, pathMappings []pathMapping, manifest string) error {
270	f, err := os.Create(out)
271	if err != nil {
272		return err
273	}
274
275	defer f.Close()
276	defer func() {
277		if err != nil {
278			os.Remove(out)
279		}
280	}()
281
282	z.errors = make(chan error)
283	defer close(z.errors)
284
285	// This channel size can be essentially unlimited -- it's used as a fifo
286	// queue decouple the CPU and IO loads. Directories don't require any
287	// compression time, but still cost some IO. Similar with small files that
288	// can be very fast to compress. Some files that are more difficult to
289	// compress won't take a corresponding longer time writing out.
290	//
291	// The optimum size here depends on your CPU and IO characteristics, and
292	// the the layout of your zip file. 1000 was chosen mostly at random as
293	// something that worked reasonably well for a test file.
294	//
295	// The RateLimit object will put the upper bounds on the number of
296	// parallel compressions and outstanding buffers.
297	z.writeOps = make(chan chan *zipEntry, 1000)
298	z.rateLimit = NewRateLimit(*parallelJobs, 0)
299	defer z.rateLimit.Stop()
300
301	go func() {
302		var err error
303		defer close(z.writeOps)
304
305		for _, ele := range pathMappings {
306			err = z.writeFile(ele.dest, ele.src, ele.zipMethod)
307			if err != nil {
308				z.errors <- err
309				return
310			}
311		}
312
313		if manifest != "" {
314			err = z.writeFile("META-INF/MANIFEST.MF", manifest, zip.Deflate)
315			if err != nil {
316				z.errors <- err
317				return
318			}
319		}
320	}()
321
322	zipw := zip.NewWriter(f)
323
324	var currentWriteOpChan chan *zipEntry
325	var currentWriter io.WriteCloser
326	var currentReaders chan chan io.Reader
327	var currentReader chan io.Reader
328	var done bool
329
330	for !done {
331		var writeOpsChan chan chan *zipEntry
332		var writeOpChan chan *zipEntry
333		var readersChan chan chan io.Reader
334
335		if currentReader != nil {
336			// Only read and process errors
337		} else if currentReaders != nil {
338			readersChan = currentReaders
339		} else if currentWriteOpChan != nil {
340			writeOpChan = currentWriteOpChan
341		} else {
342			writeOpsChan = z.writeOps
343		}
344
345		select {
346		case writeOp, ok := <-writeOpsChan:
347			if !ok {
348				done = true
349			}
350
351			currentWriteOpChan = writeOp
352
353		case op := <-writeOpChan:
354			currentWriteOpChan = nil
355
356			if op.fh.Method == zip.Deflate {
357				currentWriter, err = zipw.CreateCompressedHeader(op.fh)
358			} else {
359				var zw io.Writer
360				zw, err = zipw.CreateHeader(op.fh)
361				currentWriter = nopCloser{zw}
362			}
363			if err != nil {
364				return err
365			}
366
367			currentReaders = op.futureReaders
368			if op.futureReaders == nil {
369				currentWriter.Close()
370				currentWriter = nil
371			}
372
373		case futureReader, ok := <-readersChan:
374			if !ok {
375				// Done with reading
376				currentWriter.Close()
377				currentWriter = nil
378				currentReaders = nil
379			}
380
381			currentReader = futureReader
382
383		case reader := <-currentReader:
384			var count int64
385			count, err = io.Copy(currentWriter, reader)
386			if err != nil {
387				return err
388			}
389			z.rateLimit.Release(int(count))
390
391			currentReader = nil
392
393		case err = <-z.errors:
394			return err
395		}
396	}
397
398	// One last chance to catch an error
399	select {
400	case err = <-z.errors:
401		return err
402	default:
403		zipw.Close()
404		return nil
405	}
406}
407
408func (z *zipWriter) writeFile(dest, src string, method uint16) error {
409	var fileSize int64
410	var executable bool
411
412	if s, err := os.Lstat(src); err != nil {
413		return err
414	} else if s.IsDir() {
415		if z.directories {
416			return z.writeDirectory(dest)
417		}
418		return nil
419	} else if s.Mode()&os.ModeSymlink != 0 {
420		return z.writeSymlink(dest, src)
421	} else if !s.Mode().IsRegular() {
422		return fmt.Errorf("%s is not a file, directory, or symlink", src)
423	} else {
424		fileSize = s.Size()
425		executable = s.Mode()&0100 != 0
426	}
427
428	if z.directories {
429		dir, _ := filepath.Split(dest)
430		err := z.writeDirectory(dir)
431		if err != nil {
432			return err
433		}
434	}
435
436	compressChan := make(chan *zipEntry, 1)
437	z.writeOps <- compressChan
438
439	// Pre-fill a zipEntry, it will be sent in the compressChan once
440	// we're sure about the Method and CRC.
441	ze := &zipEntry{
442		fh: &zip.FileHeader{
443			Name:   dest,
444			Method: method,
445
446			UncompressedSize64: uint64(fileSize),
447		},
448	}
449	ze.fh.SetModTime(z.time)
450	if executable {
451		ze.fh.SetMode(0700)
452	}
453
454	r, err := os.Open(src)
455	if err != nil {
456		return err
457	}
458
459	exec := z.rateLimit.RequestExecution()
460
461	if method == zip.Deflate && fileSize >= minParallelFileSize {
462		wg := new(sync.WaitGroup)
463
464		// Allocate enough buffer to hold all readers. We'll limit
465		// this based on actual buffer sizes in RateLimit.
466		ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
467
468		// Calculate the CRC in the background, since reading the entire
469		// file could take a while.
470		//
471		// We could split this up into chuncks as well, but it's faster
472		// than the compression. Due to the Go Zip API, we also need to
473		// know the result before we can begin writing the compressed
474		// data out to the zipfile.
475		wg.Add(1)
476		go z.crcFile(r, ze, exec, compressChan, wg)
477
478		for start := int64(0); start < fileSize; start += parallelBlockSize {
479			sr := io.NewSectionReader(r, start, parallelBlockSize)
480			resultChan := make(chan io.Reader, 1)
481			ze.futureReaders <- resultChan
482
483			exec := z.rateLimit.RequestExecution()
484
485			last := !(start+parallelBlockSize < fileSize)
486			var dict []byte
487			if start >= windowSize {
488				dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize))
489			}
490
491			wg.Add(1)
492			go z.compressPartialFile(sr, dict, last, exec, resultChan, wg)
493		}
494
495		close(ze.futureReaders)
496
497		// Close the file handle after all readers are done
498		go func(wg *sync.WaitGroup, f *os.File) {
499			wg.Wait()
500			f.Close()
501		}(wg, r)
502	} else {
503		go z.compressWholeFile(ze, r, exec, compressChan)
504	}
505
506	return nil
507}
508
509func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) {
510	defer wg.Done()
511	defer exec.Finish(0)
512
513	crc := crc32.NewIEEE()
514	_, err := io.Copy(crc, r)
515	if err != nil {
516		z.errors <- err
517		return
518	}
519
520	ze.fh.CRC32 = crc.Sum32()
521	resultChan <- ze
522	close(resultChan)
523}
524
525func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) {
526	defer wg.Done()
527
528	result, err := z.compressBlock(r, dict, last)
529	if err != nil {
530		z.errors <- err
531		return
532	}
533
534	exec.Finish(result.Len())
535	resultChan <- result
536}
537
538func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) {
539	buf := new(bytes.Buffer)
540	var fw *flate.Writer
541	var err error
542	if len(dict) > 0 {
543		// There's no way to Reset a Writer with a new dictionary, so
544		// don't use the Pool
545		fw, err = flate.NewWriterDict(buf, z.compLevel, dict)
546	} else {
547		var ok bool
548		if fw, ok = z.compressorPool.Get().(*flate.Writer); ok {
549			fw.Reset(buf)
550		} else {
551			fw, err = flate.NewWriter(buf, z.compLevel)
552		}
553		defer z.compressorPool.Put(fw)
554	}
555	if err != nil {
556		return nil, err
557	}
558
559	_, err = io.Copy(fw, r)
560	if err != nil {
561		return nil, err
562	}
563	if last {
564		fw.Close()
565	} else {
566		fw.Flush()
567	}
568
569	return buf, nil
570}
571
572func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) {
573	var bufSize int
574
575	defer r.Close()
576
577	crc := crc32.NewIEEE()
578	_, err := io.Copy(crc, r)
579	if err != nil {
580		z.errors <- err
581		return
582	}
583
584	ze.fh.CRC32 = crc.Sum32()
585
586	_, err = r.Seek(0, 0)
587	if err != nil {
588		z.errors <- err
589		return
590	}
591
592	readFile := func(r *os.File) ([]byte, error) {
593		_, err = r.Seek(0, 0)
594		if err != nil {
595			return nil, err
596		}
597
598		buf, err := ioutil.ReadAll(r)
599		if err != nil {
600			return nil, err
601		}
602
603		return buf, nil
604	}
605
606	ze.futureReaders = make(chan chan io.Reader, 1)
607	futureReader := make(chan io.Reader, 1)
608	ze.futureReaders <- futureReader
609	close(ze.futureReaders)
610
611	if ze.fh.Method == zip.Deflate {
612		compressed, err := z.compressBlock(r, nil, true)
613		if err != nil {
614			z.errors <- err
615			return
616		}
617		if uint64(compressed.Len()) < ze.fh.UncompressedSize64 {
618			futureReader <- compressed
619			bufSize = compressed.Len()
620		} else {
621			buf, err := readFile(r)
622			if err != nil {
623				z.errors <- err
624				return
625			}
626			ze.fh.Method = zip.Store
627			futureReader <- bytes.NewReader(buf)
628			bufSize = int(ze.fh.UncompressedSize64)
629		}
630	} else {
631		buf, err := readFile(r)
632		if err != nil {
633			z.errors <- err
634			return
635		}
636		ze.fh.Method = zip.Store
637		futureReader <- bytes.NewReader(buf)
638		bufSize = int(ze.fh.UncompressedSize64)
639	}
640
641	exec.Finish(bufSize)
642	close(futureReader)
643
644	compressChan <- ze
645	close(compressChan)
646}
647
648func (z *zipWriter) writeDirectory(dir string) error {
649	if dir != "" && !strings.HasSuffix(dir, "/") {
650		dir = dir + "/"
651	}
652
653	for dir != "" && dir != "./" && !z.createdDirs[dir] {
654		z.createdDirs[dir] = true
655
656		dirHeader := &zip.FileHeader{
657			Name: dir,
658		}
659		dirHeader.SetMode(0700 | os.ModeDir)
660		dirHeader.SetModTime(z.time)
661
662		ze := make(chan *zipEntry, 1)
663		ze <- &zipEntry{
664			fh: dirHeader,
665		}
666		close(ze)
667		z.writeOps <- ze
668
669		dir, _ = filepath.Split(dir)
670	}
671
672	return nil
673}
674
675func (z *zipWriter) writeSymlink(rel, file string) error {
676	if z.directories {
677		dir, _ := filepath.Split(rel)
678		if err := z.writeDirectory(dir); err != nil {
679			return err
680		}
681	}
682
683	fileHeader := &zip.FileHeader{
684		Name: rel,
685	}
686	fileHeader.SetModTime(z.time)
687	fileHeader.SetMode(0700 | os.ModeSymlink)
688
689	dest, err := os.Readlink(file)
690	if err != nil {
691		return err
692	}
693
694	ze := make(chan *zipEntry, 1)
695	futureReaders := make(chan chan io.Reader, 1)
696	futureReader := make(chan io.Reader, 1)
697	futureReaders <- futureReader
698	close(futureReaders)
699	futureReader <- bytes.NewBufferString(dest)
700	close(futureReader)
701
702	// We didn't ask permission to execute, since this should be very short
703	// but we still need to increment the outstanding buffer sizes, since
704	// the read will decrement the buffer size.
705	z.rateLimit.Release(-len(dest))
706
707	ze <- &zipEntry{
708		fh:            fileHeader,
709		futureReaders: futureReaders,
710	}
711	close(ze)
712	z.writeOps <- ze
713
714	return nil
715}
716