• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18	"bytes"
19	"compress/flate"
20	"flag"
21	"fmt"
22	"hash/crc32"
23	"io"
24	"io/ioutil"
25	"log"
26	"os"
27	"path/filepath"
28	"runtime"
29	"runtime/pprof"
30	"runtime/trace"
31	"strings"
32	"sync"
33	"time"
34
35	"android/soong/third_party/zip"
36)
37
38// Block size used during parallel compression of a single file.
39const parallelBlockSize = 1 * 1024 * 1024 // 1MB
40
41// Minimum file size to use parallel compression. It requires more
42// flate.Writer allocations, since we can't change the dictionary
43// during Reset
44const minParallelFileSize = parallelBlockSize * 6
45
46// Size of the ZIP compression window (32KB)
47const windowSize = 32 * 1024
48
49type nopCloser struct {
50	io.Writer
51}
52
53func (nopCloser) Close() error {
54	return nil
55}
56
57type fileArg struct {
58	rootPrefix, relativeRoot, file string
59}
60
61type pathMapping struct {
62	dest, src string
63}
64
65type fileArgs []fileArg
66
67func (l *fileArgs) String() string {
68	return `""`
69}
70
71func (l *fileArgs) Set(s string) error {
72	if *relativeRoot == "" {
73		return fmt.Errorf("must pass -C before -f or -l")
74	}
75
76	*l = append(*l,
77		fileArg{rootPrefix: filepath.Clean(*rootPrefix),
78			relativeRoot: filepath.Clean(*relativeRoot),
79			file:         s})
80	return nil
81}
82
83func (l *fileArgs) Get() interface{} {
84	return l
85}
86
87var (
88	out          = flag.String("o", "", "file to write zip file to")
89	manifest     = flag.String("m", "", "input jar manifest file name")
90	directories  = flag.Bool("d", false, "include directories in zip")
91	rootPrefix   = flag.String("P", "", "path prefix within the zip at which to place files")
92	relativeRoot = flag.String("C", "", "path to use as relative root of files in next -f or -l argument")
93	parallelJobs = flag.Int("j", runtime.NumCPU(), "number of parallel threads to use")
94	compLevel    = flag.Int("L", 5, "deflate compression level (0-9)")
95
96	listFiles fileArgs
97	files     fileArgs
98
99	cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file")
100	traceFile  = flag.String("trace", "", "write trace to file")
101)
102
103func init() {
104	flag.Var(&listFiles, "l", "file containing list of .class files")
105	flag.Var(&files, "f", "file to include in zip")
106}
107
108func usage() {
109	fmt.Fprintf(os.Stderr, "usage: soong_zip -o zipfile [-m manifest] -C dir [-f|-l file]...\n")
110	flag.PrintDefaults()
111	os.Exit(2)
112}
113
114type zipWriter struct {
115	time        time.Time
116	createdDirs map[string]bool
117	directories bool
118
119	errors   chan error
120	writeOps chan chan *zipEntry
121
122	rateLimit *RateLimit
123
124	compressorPool sync.Pool
125	compLevel      int
126}
127
128type zipEntry struct {
129	fh *zip.FileHeader
130
131	// List of delayed io.Reader
132	futureReaders chan chan io.Reader
133}
134
135func main() {
136	flag.Parse()
137
138	if *cpuProfile != "" {
139		f, err := os.Create(*cpuProfile)
140		if err != nil {
141			fmt.Fprintln(os.Stderr, err.Error())
142			os.Exit(1)
143		}
144		defer f.Close()
145		pprof.StartCPUProfile(f)
146		defer pprof.StopCPUProfile()
147	}
148
149	if *traceFile != "" {
150		f, err := os.Create(*traceFile)
151		if err != nil {
152			fmt.Fprintln(os.Stderr, err.Error())
153			os.Exit(1)
154		}
155		defer f.Close()
156		err = trace.Start(f)
157		if err != nil {
158			fmt.Fprintln(os.Stderr, err.Error())
159			os.Exit(1)
160		}
161		defer trace.Stop()
162	}
163
164	if *out == "" {
165		fmt.Fprintf(os.Stderr, "error: -o is required\n")
166		usage()
167	}
168
169	w := &zipWriter{
170		time:        time.Date(2009, 1, 1, 0, 0, 0, 0, time.UTC),
171		createdDirs: make(map[string]bool),
172		directories: *directories,
173		compLevel:   *compLevel,
174	}
175
176	pathMappings := []pathMapping{}
177	set := make(map[string]string)
178
179	// load listFiles, which specify other files to include.
180	for _, l := range listFiles {
181		list, err := ioutil.ReadFile(l.file)
182		if err != nil {
183			fmt.Fprintln(os.Stderr, err.Error())
184			os.Exit(1)
185		}
186		srcs := strings.Split(string(list), "\n")
187		for _, src := range srcs {
188			if err := fillPathPairs(l.rootPrefix, l.relativeRoot, src,
189				set, &pathMappings); err != nil {
190				log.Fatal(err)
191			}
192		}
193	}
194
195	// also include the usual files that are to be added directly.
196	for _, f := range files {
197		if err := fillPathPairs(f.rootPrefix, f.relativeRoot,
198			f.file, set, &pathMappings); err != nil {
199			log.Fatal(err)
200		}
201	}
202
203	err := w.write(*out, pathMappings, *manifest)
204	if err != nil {
205		fmt.Fprintln(os.Stderr, err.Error())
206		os.Exit(1)
207	}
208}
209
210func fillPathPairs(prefix, rel, src string, set map[string]string, pathMappings *[]pathMapping) error {
211	src = strings.TrimSpace(src)
212	if src == "" {
213		return nil
214	}
215	src = filepath.Clean(src)
216	dest, err := filepath.Rel(rel, src)
217	if err != nil {
218		return err
219	}
220	dest = filepath.Join(prefix, dest)
221
222	if _, found := set[dest]; found {
223		return fmt.Errorf("found two file paths to be copied into dest path: %q,"+
224			" both [%q]%q and [%q]%q!",
225			dest, dest, src, dest, set[dest])
226	} else {
227		set[dest] = src
228	}
229
230	*pathMappings = append(*pathMappings, pathMapping{dest: dest, src: src})
231
232	return nil
233}
234
235func (z *zipWriter) write(out string, pathMappings []pathMapping, manifest string) error {
236	f, err := os.Create(out)
237	if err != nil {
238		return err
239	}
240
241	defer f.Close()
242	defer func() {
243		if err != nil {
244			os.Remove(out)
245		}
246	}()
247
248	z.errors = make(chan error)
249	defer close(z.errors)
250
251	// This channel size can be essentially unlimited -- it's used as a fifo
252	// queue decouple the CPU and IO loads. Directories don't require any
253	// compression time, but still cost some IO. Similar with small files that
254	// can be very fast to compress. Some files that are more difficult to
255	// compress won't take a corresponding longer time writing out.
256	//
257	// The optimum size here depends on your CPU and IO characteristics, and
258	// the the layout of your zip file. 1000 was chosen mostly at random as
259	// something that worked reasonably well for a test file.
260	//
261	// The RateLimit object will put the upper bounds on the number of
262	// parallel compressions and outstanding buffers.
263	z.writeOps = make(chan chan *zipEntry, 1000)
264	z.rateLimit = NewRateLimit(*parallelJobs, 0)
265	defer z.rateLimit.Stop()
266
267	go func() {
268		var err error
269		defer close(z.writeOps)
270
271		for _, ele := range pathMappings {
272			err = z.writeFile(ele.dest, ele.src)
273			if err != nil {
274				z.errors <- err
275				return
276			}
277		}
278
279		if manifest != "" {
280			err = z.writeFile("META-INF/MANIFEST.MF", manifest)
281			if err != nil {
282				z.errors <- err
283				return
284			}
285		}
286	}()
287
288	zipw := zip.NewWriter(f)
289
290	var currentWriteOpChan chan *zipEntry
291	var currentWriter io.WriteCloser
292	var currentReaders chan chan io.Reader
293	var currentReader chan io.Reader
294	var done bool
295
296	for !done {
297		var writeOpsChan chan chan *zipEntry
298		var writeOpChan chan *zipEntry
299		var readersChan chan chan io.Reader
300
301		if currentReader != nil {
302			// Only read and process errors
303		} else if currentReaders != nil {
304			readersChan = currentReaders
305		} else if currentWriteOpChan != nil {
306			writeOpChan = currentWriteOpChan
307		} else {
308			writeOpsChan = z.writeOps
309		}
310
311		select {
312		case writeOp, ok := <-writeOpsChan:
313			if !ok {
314				done = true
315			}
316
317			currentWriteOpChan = writeOp
318
319		case op := <-writeOpChan:
320			currentWriteOpChan = nil
321
322			if op.fh.Method == zip.Deflate {
323				currentWriter, err = zipw.CreateCompressedHeader(op.fh)
324			} else {
325				var zw io.Writer
326				zw, err = zipw.CreateHeader(op.fh)
327				currentWriter = nopCloser{zw}
328			}
329			if err != nil {
330				return err
331			}
332
333			currentReaders = op.futureReaders
334			if op.futureReaders == nil {
335				currentWriter.Close()
336				currentWriter = nil
337			}
338
339		case futureReader, ok := <-readersChan:
340			if !ok {
341				// Done with reading
342				currentWriter.Close()
343				currentWriter = nil
344				currentReaders = nil
345			}
346
347			currentReader = futureReader
348
349		case reader := <-currentReader:
350			var count int64
351			count, err = io.Copy(currentWriter, reader)
352			if err != nil {
353				return err
354			}
355			z.rateLimit.Release(int(count))
356
357			currentReader = nil
358
359		case err = <-z.errors:
360			return err
361		}
362	}
363
364	// One last chance to catch an error
365	select {
366	case err = <-z.errors:
367		return err
368	default:
369		zipw.Close()
370		return nil
371	}
372}
373
374func (z *zipWriter) writeFile(dest, src string) error {
375	var fileSize int64
376	var executable bool
377
378	if s, err := os.Lstat(src); err != nil {
379		return err
380	} else if s.IsDir() {
381		if z.directories {
382			return z.writeDirectory(dest)
383		}
384		return nil
385	} else if s.Mode()&os.ModeSymlink != 0 {
386		return z.writeSymlink(dest, src)
387	} else if !s.Mode().IsRegular() {
388		return fmt.Errorf("%s is not a file, directory, or symlink", src)
389	} else {
390		fileSize = s.Size()
391		executable = s.Mode()&0100 != 0
392	}
393
394	if z.directories {
395		dir, _ := filepath.Split(dest)
396		err := z.writeDirectory(dir)
397		if err != nil {
398			return err
399		}
400	}
401
402	compressChan := make(chan *zipEntry, 1)
403	z.writeOps <- compressChan
404
405	// Pre-fill a zipEntry, it will be sent in the compressChan once
406	// we're sure about the Method and CRC.
407	ze := &zipEntry{
408		fh: &zip.FileHeader{
409			Name:   dest,
410			Method: zip.Deflate,
411
412			UncompressedSize64: uint64(fileSize),
413		},
414	}
415	ze.fh.SetModTime(z.time)
416	if executable {
417		ze.fh.SetMode(0700)
418	}
419
420	r, err := os.Open(src)
421	if err != nil {
422		return err
423	}
424
425	exec := z.rateLimit.RequestExecution()
426
427	if fileSize >= minParallelFileSize {
428		wg := new(sync.WaitGroup)
429
430		// Allocate enough buffer to hold all readers. We'll limit
431		// this based on actual buffer sizes in RateLimit.
432		ze.futureReaders = make(chan chan io.Reader, (fileSize/parallelBlockSize)+1)
433
434		// Calculate the CRC in the background, since reading the entire
435		// file could take a while.
436		//
437		// We could split this up into chuncks as well, but it's faster
438		// than the compression. Due to the Go Zip API, we also need to
439		// know the result before we can begin writing the compressed
440		// data out to the zipfile.
441		wg.Add(1)
442		go z.crcFile(r, ze, exec, compressChan, wg)
443
444		for start := int64(0); start < fileSize; start += parallelBlockSize {
445			sr := io.NewSectionReader(r, start, parallelBlockSize)
446			resultChan := make(chan io.Reader, 1)
447			ze.futureReaders <- resultChan
448
449			exec := z.rateLimit.RequestExecution()
450
451			last := !(start+parallelBlockSize < fileSize)
452			var dict []byte
453			if start >= windowSize {
454				dict, err = ioutil.ReadAll(io.NewSectionReader(r, start-windowSize, windowSize))
455			}
456
457			wg.Add(1)
458			go z.compressPartialFile(sr, dict, last, exec, resultChan, wg)
459		}
460
461		close(ze.futureReaders)
462
463		// Close the file handle after all readers are done
464		go func(wg *sync.WaitGroup, f *os.File) {
465			wg.Wait()
466			f.Close()
467		}(wg, r)
468	} else {
469		go z.compressWholeFile(ze, r, exec, compressChan)
470	}
471
472	return nil
473}
474
475func (z *zipWriter) crcFile(r io.Reader, ze *zipEntry, exec Execution, resultChan chan *zipEntry, wg *sync.WaitGroup) {
476	defer wg.Done()
477	defer exec.Finish(0)
478
479	crc := crc32.NewIEEE()
480	_, err := io.Copy(crc, r)
481	if err != nil {
482		z.errors <- err
483		return
484	}
485
486	ze.fh.CRC32 = crc.Sum32()
487	resultChan <- ze
488	close(resultChan)
489}
490
491func (z *zipWriter) compressPartialFile(r io.Reader, dict []byte, last bool, exec Execution, resultChan chan io.Reader, wg *sync.WaitGroup) {
492	defer wg.Done()
493
494	result, err := z.compressBlock(r, dict, last)
495	if err != nil {
496		z.errors <- err
497		return
498	}
499
500	exec.Finish(result.Len())
501	resultChan <- result
502}
503
504func (z *zipWriter) compressBlock(r io.Reader, dict []byte, last bool) (*bytes.Buffer, error) {
505	buf := new(bytes.Buffer)
506	var fw *flate.Writer
507	var err error
508	if len(dict) > 0 {
509		// There's no way to Reset a Writer with a new dictionary, so
510		// don't use the Pool
511		fw, err = flate.NewWriterDict(buf, z.compLevel, dict)
512	} else {
513		var ok bool
514		if fw, ok = z.compressorPool.Get().(*flate.Writer); ok {
515			fw.Reset(buf)
516		} else {
517			fw, err = flate.NewWriter(buf, z.compLevel)
518		}
519		defer z.compressorPool.Put(fw)
520	}
521	if err != nil {
522		return nil, err
523	}
524
525	_, err = io.Copy(fw, r)
526	if err != nil {
527		return nil, err
528	}
529	if last {
530		fw.Close()
531	} else {
532		fw.Flush()
533	}
534
535	return buf, nil
536}
537
538func (z *zipWriter) compressWholeFile(ze *zipEntry, r *os.File, exec Execution, compressChan chan *zipEntry) {
539	var bufSize int
540
541	defer r.Close()
542
543	crc := crc32.NewIEEE()
544	_, err := io.Copy(crc, r)
545	if err != nil {
546		z.errors <- err
547		return
548	}
549
550	ze.fh.CRC32 = crc.Sum32()
551
552	_, err = r.Seek(0, 0)
553	if err != nil {
554		z.errors <- err
555		return
556	}
557
558	compressed, err := z.compressBlock(r, nil, true)
559
560	ze.futureReaders = make(chan chan io.Reader, 1)
561	futureReader := make(chan io.Reader, 1)
562	ze.futureReaders <- futureReader
563	close(ze.futureReaders)
564
565	if uint64(compressed.Len()) < ze.fh.UncompressedSize64 {
566		futureReader <- compressed
567		bufSize = compressed.Len()
568	} else {
569		_, err = r.Seek(0, 0)
570		if err != nil {
571			z.errors <- err
572			return
573		}
574
575		buf, err := ioutil.ReadAll(r)
576		if err != nil {
577			z.errors <- err
578			return
579		}
580
581		ze.fh.Method = zip.Store
582		futureReader <- bytes.NewReader(buf)
583		bufSize = int(ze.fh.UncompressedSize64)
584	}
585	exec.Finish(bufSize)
586	close(futureReader)
587
588	compressChan <- ze
589	close(compressChan)
590}
591
592func (z *zipWriter) writeDirectory(dir string) error {
593	if dir != "" && !strings.HasSuffix(dir, "/") {
594		dir = dir + "/"
595	}
596
597	for dir != "" && dir != "./" && !z.createdDirs[dir] {
598		z.createdDirs[dir] = true
599
600		dirHeader := &zip.FileHeader{
601			Name: dir,
602		}
603		dirHeader.SetMode(0700 | os.ModeDir)
604		dirHeader.SetModTime(z.time)
605
606		ze := make(chan *zipEntry, 1)
607		ze <- &zipEntry{
608			fh: dirHeader,
609		}
610		close(ze)
611		z.writeOps <- ze
612
613		dir, _ = filepath.Split(dir)
614	}
615
616	return nil
617}
618
619func (z *zipWriter) writeSymlink(rel, file string) error {
620	if z.directories {
621		dir, _ := filepath.Split(rel)
622		if err := z.writeDirectory(dir); err != nil {
623			return err
624		}
625	}
626
627	fileHeader := &zip.FileHeader{
628		Name: rel,
629	}
630	fileHeader.SetModTime(z.time)
631	fileHeader.SetMode(0700 | os.ModeSymlink)
632
633	dest, err := os.Readlink(file)
634	if err != nil {
635		return err
636	}
637
638	ze := make(chan *zipEntry, 1)
639	futureReaders := make(chan chan io.Reader, 1)
640	futureReader := make(chan io.Reader, 1)
641	futureReaders <- futureReader
642	close(futureReaders)
643	futureReader <- bytes.NewBufferString(dest)
644	close(futureReader)
645
646	// We didn't ask permission to execute, since this should be very short
647	// but we still need to increment the outstanding buffer sizes, since
648	// the read will decrement the buffer size.
649	z.rateLimit.Release(-len(dest))
650
651	ze <- &zipEntry{
652		fh:            fileHeader,
653		futureReaders: futureReaders,
654	}
655	close(ze)
656	z.writeOps <- ze
657
658	return nil
659}
660