1// Copyright 2017 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "errors" 19 "flag" 20 "fmt" 21 "hash/crc32" 22 "io" 23 "io/ioutil" 24 "log" 25 "os" 26 "path/filepath" 27 "sort" 28 29 "github.com/google/blueprint/pathtools" 30 31 "android/soong/jar" 32 "android/soong/third_party/zip" 33) 34 35type fileList []string 36 37func (f *fileList) String() string { 38 return `""` 39} 40 41func (f *fileList) Set(name string) error { 42 *f = append(*f, filepath.Clean(name)) 43 44 return nil 45} 46 47type zipsToNotStripSet map[string]bool 48 49func (s zipsToNotStripSet) String() string { 50 return `""` 51} 52 53func (s zipsToNotStripSet) Set(zip_path string) error { 54 s[zip_path] = true 55 56 return nil 57} 58 59var ( 60 sortEntries = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)") 61 emulateJar = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)") 62 emulatePar = flag.Bool("p", false, "merge zip entries based on par format") 63 stripDirs fileList 64 stripFiles fileList 65 zipsToNotStrip = make(zipsToNotStripSet) 66 stripDirEntries = flag.Bool("D", false, "strip directory entries from the output zip file") 67 manifest = flag.String("m", "", "manifest file to insert in jar") 68 pyMain = flag.String("pm", "", "__main__.py file to insert in par") 69 prefix = flag.String("prefix", "", "A file to prefix to the zip file") 70 ignoreDuplicates = flag.Bool("ignore-duplicates", false, "take each entry from the first zip it exists in and don't warn") 71) 72 73func init() { 74 flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards") 75 flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards") 76 flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping") 77} 78 79func main() { 80 flag.Usage = func() { 81 fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]") 82 flag.PrintDefaults() 83 } 84 85 // parse args 86 flag.Parse() 87 args := flag.Args() 88 if len(args) < 1 { 89 flag.Usage() 90 os.Exit(1) 91 } 92 outputPath := args[0] 93 inputs := args[1:] 94 95 log.SetFlags(log.Lshortfile) 96 97 // make writer 98 output, err := os.Create(outputPath) 99 if err != nil { 100 log.Fatal(err) 101 } 102 defer output.Close() 103 104 var offset int64 105 if *prefix != "" { 106 prefixFile, err := os.Open(*prefix) 107 if err != nil { 108 log.Fatal(err) 109 } 110 offset, err = io.Copy(output, prefixFile) 111 if err != nil { 112 log.Fatal(err) 113 } 114 } 115 116 writer := zip.NewWriter(output) 117 defer func() { 118 err := writer.Close() 119 if err != nil { 120 log.Fatal(err) 121 } 122 }() 123 writer.SetOffset(offset) 124 125 // make readers 126 readers := []namedZipReader{} 127 for _, input := range inputs { 128 reader, err := zip.OpenReader(input) 129 if err != nil { 130 log.Fatal(err) 131 } 132 defer reader.Close() 133 namedReader := namedZipReader{path: input, reader: &reader.Reader} 134 readers = append(readers, namedReader) 135 } 136 137 if *manifest != "" && !*emulateJar { 138 log.Fatal(errors.New("must specify -j when specifying a manifest via -m")) 139 } 140 141 if *pyMain != "" && !*emulatePar { 142 log.Fatal(errors.New("must specify -p when specifying a Python __main__.py via -pm")) 143 } 144 145 // do merge 146 err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar, 147 *stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip)) 148 if err != nil { 149 log.Fatal(err) 150 } 151} 152 153// a namedZipReader reads a .zip file and can say which file it's reading 154type namedZipReader struct { 155 path string 156 reader *zip.Reader 157} 158 159// a zipEntryPath refers to a file contained in a zip 160type zipEntryPath struct { 161 zipName string 162 entryName string 163} 164 165func (p zipEntryPath) String() string { 166 return p.zipName + "/" + p.entryName 167} 168 169// a zipEntry is a zipSource that pulls its content from another zip 170type zipEntry struct { 171 path zipEntryPath 172 content *zip.File 173} 174 175func (ze zipEntry) String() string { 176 return ze.path.String() 177} 178 179func (ze zipEntry) IsDir() bool { 180 return ze.content.FileInfo().IsDir() 181} 182 183func (ze zipEntry) CRC32() uint32 { 184 return ze.content.FileHeader.CRC32 185} 186 187func (ze zipEntry) Size() uint64 { 188 return ze.content.FileHeader.UncompressedSize64 189} 190 191func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error { 192 return zw.CopyFrom(ze.content, dest) 193} 194 195// a bufferEntry is a zipSource that pulls its content from a []byte 196type bufferEntry struct { 197 fh *zip.FileHeader 198 content []byte 199} 200 201func (be bufferEntry) String() string { 202 return "internal buffer" 203} 204 205func (be bufferEntry) IsDir() bool { 206 return be.fh.FileInfo().IsDir() 207} 208 209func (be bufferEntry) CRC32() uint32 { 210 return crc32.ChecksumIEEE(be.content) 211} 212 213func (be bufferEntry) Size() uint64 { 214 return uint64(len(be.content)) 215} 216 217func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error { 218 w, err := zw.CreateHeader(be.fh) 219 if err != nil { 220 return err 221 } 222 223 if !be.IsDir() { 224 _, err = w.Write(be.content) 225 if err != nil { 226 return err 227 } 228 } 229 230 return nil 231} 232 233type zipSource interface { 234 String() string 235 IsDir() bool 236 CRC32() uint32 237 Size() uint64 238 WriteToZip(dest string, zw *zip.Writer) error 239} 240 241// a fileMapping specifies to copy a zip entry from one place to another 242type fileMapping struct { 243 dest string 244 source zipSource 245} 246 247func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string, 248 sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool, 249 stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error { 250 251 sourceByDest := make(map[string]zipSource, 0) 252 orderedMappings := []fileMapping{} 253 254 // if dest already exists returns a non-null zipSource for the existing source 255 addMapping := func(dest string, source zipSource) zipSource { 256 mapKey := filepath.Clean(dest) 257 if existingSource, exists := sourceByDest[mapKey]; exists { 258 return existingSource 259 } 260 261 sourceByDest[mapKey] = source 262 orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest}) 263 return nil 264 } 265 266 if manifest != "" { 267 if !stripDirEntries { 268 dirHeader := jar.MetaDirFileHeader() 269 dirSource := bufferEntry{dirHeader, nil} 270 addMapping(jar.MetaDir, dirSource) 271 } 272 273 contents, err := ioutil.ReadFile(manifest) 274 if err != nil { 275 return err 276 } 277 278 fh, buf, err := jar.ManifestFileContents(contents) 279 if err != nil { 280 return err 281 } 282 283 fileSource := bufferEntry{fh, buf} 284 addMapping(jar.ManifestFile, fileSource) 285 } 286 287 if pyMain != "" { 288 buf, err := ioutil.ReadFile(pyMain) 289 if err != nil { 290 return err 291 } 292 fh := &zip.FileHeader{ 293 Name: "__main__.py", 294 Method: zip.Store, 295 UncompressedSize64: uint64(len(buf)), 296 } 297 fh.SetMode(0700) 298 fh.SetModTime(jar.DefaultTime) 299 fileSource := bufferEntry{fh, buf} 300 addMapping("__main__.py", fileSource) 301 } 302 303 if emulatePar { 304 // the runfiles packages needs to be populated with "__init__.py". 305 newPyPkgs := []string{} 306 // the runfiles dirs have been treated as packages. 307 existingPyPkgSet := make(map[string]bool) 308 // put existing __init__.py files to a set first. This set is used for preventing 309 // generated __init__.py files from overwriting existing ones. 310 for _, namedReader := range readers { 311 for _, file := range namedReader.reader.File { 312 if filepath.Base(file.Name) != "__init__.py" { 313 continue 314 } 315 pyPkg := pathBeforeLastSlash(file.Name) 316 if _, found := existingPyPkgSet[pyPkg]; found { 317 panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name)) 318 } else { 319 existingPyPkgSet[pyPkg] = true 320 } 321 } 322 } 323 for _, namedReader := range readers { 324 for _, file := range namedReader.reader.File { 325 var parentPath string /* the path after trimming last "/" */ 326 if filepath.Base(file.Name) == "__init__.py" { 327 // for existing __init__.py files, we should trim last "/" for twice. 328 // eg. a/b/c/__init__.py ---> a/b 329 parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name)) 330 } else { 331 parentPath = pathBeforeLastSlash(file.Name) 332 } 333 populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs) 334 } 335 } 336 for _, pkg := range newPyPkgs { 337 var emptyBuf []byte 338 fh := &zip.FileHeader{ 339 Name: filepath.Join(pkg, "__init__.py"), 340 Method: zip.Store, 341 UncompressedSize64: uint64(len(emptyBuf)), 342 } 343 fh.SetMode(0700) 344 fh.SetModTime(jar.DefaultTime) 345 fileSource := bufferEntry{fh, emptyBuf} 346 addMapping(filepath.Join(pkg, "__init__.py"), fileSource) 347 } 348 } 349 for _, namedReader := range readers { 350 _, skipStripThisZip := zipsToNotStrip[namedReader.path] 351 for _, file := range namedReader.reader.File { 352 if !skipStripThisZip { 353 if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil { 354 return err 355 } else if skip { 356 continue 357 } 358 } 359 360 if stripDirEntries && file.FileInfo().IsDir() { 361 continue 362 } 363 364 // check for other files or directories destined for the same path 365 dest := file.Name 366 367 // make a new entry to add 368 source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file} 369 370 if existingSource := addMapping(dest, source); existingSource != nil { 371 // handle duplicates 372 if existingSource.IsDir() != source.IsDir() { 373 return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n", 374 dest, existingSource, source) 375 } 376 377 if ignoreDuplicates { 378 continue 379 } 380 381 if emulateJar && 382 file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass { 383 // Skip manifest and module info files that are not from the first input file 384 continue 385 } 386 387 if source.IsDir() { 388 continue 389 } 390 391 if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() { 392 continue 393 } 394 395 return fmt.Errorf("Duplicate path %v found in %v and %v\n", 396 dest, existingSource, source) 397 } 398 } 399 } 400 401 if emulateJar { 402 jarSort(orderedMappings) 403 } else if sortEntries { 404 alphanumericSort(orderedMappings) 405 } 406 407 for _, entry := range orderedMappings { 408 if err := entry.source.WriteToZip(entry.dest, writer); err != nil { 409 return err 410 } 411 } 412 413 return nil 414} 415 416// Sets the given directory and all its ancestor directories as Python packages. 417func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) { 418 for pkgPath != "" { 419 if _, found := existingPyPkgSet[pkgPath]; !found { 420 existingPyPkgSet[pkgPath] = true 421 *newPyPkgs = append(*newPyPkgs, pkgPath) 422 // Gets its ancestor directory by trimming last slash. 423 pkgPath = pathBeforeLastSlash(pkgPath) 424 } else { 425 break 426 } 427 } 428} 429 430func pathBeforeLastSlash(path string) string { 431 ret := filepath.Dir(path) 432 // filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/". 433 if ret == "." || ret == "/" { 434 return "" 435 } 436 return ret 437} 438 439func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) { 440 for _, dir := range stripDirs { 441 dir = filepath.Clean(dir) 442 patterns := []string{ 443 dir + "/", // the directory itself 444 dir + "/**/*", // files recursively in the directory 445 dir + "/**/*/", // directories recursively in the directory 446 } 447 448 for _, pattern := range patterns { 449 match, err := pathtools.Match(pattern, name) 450 if err != nil { 451 return false, fmt.Errorf("%s: %s", err.Error(), pattern) 452 } else if match { 453 if emulateJar { 454 // When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is 455 // requested. 456 // TODO(ccross): which files does this affect? 457 if name != jar.MetaDir && name != jar.ManifestFile { 458 return true, nil 459 } 460 } 461 return true, nil 462 } 463 } 464 } 465 466 for _, pattern := range stripFiles { 467 if match, err := pathtools.Match(pattern, name); err != nil { 468 return false, fmt.Errorf("%s: %s", err.Error(), pattern) 469 } else if match { 470 return true, nil 471 } 472 } 473 return false, nil 474} 475 476func jarSort(files []fileMapping) { 477 sort.SliceStable(files, func(i, j int) bool { 478 return jar.EntryNamesLess(files[i].dest, files[j].dest) 479 }) 480} 481 482func alphanumericSort(files []fileMapping) { 483 sort.SliceStable(files, func(i, j int) bool { 484 return files[i].dest < files[j].dest 485 }) 486} 487