1/* Copyright 2014 Google LLC 2 3Redistribution and use in source and binary forms, with or without 4modification, are permitted provided that the following conditions are 5met: 6 7 * Redistributions of source code must retain the above copyright 8notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above 10copyright notice, this list of conditions and the following disclaimer 11in the documentation and/or other materials provided with the 12distribution. 13 * Neither the name of Google LLC nor the names of its 14contributors may be used to endorse or promote products derived from 15this software without specific prior written permission. 16 17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*/ 29 30/* 31Tool upload_system_symbols generates and uploads Breakpad symbol files for OS X system libraries. 32 33This tool shells out to the dump_syms and symupload Breakpad tools. In its default mode, this 34will find all dynamic libraries on the system, run dump_syms to create the Breakpad symbol files, 35and then upload them to Google's crash infrastructure. 36 37The tool can also be used to only dump libraries or upload from a directory. See -help for more 38information. 39 40Both i386 and x86_64 architectures will be dumped and uploaded. 41*/ 42package main 43 44import ( 45 "debug/macho" 46 "flag" 47 "fmt" 48 "io" 49 "io/ioutil" 50 "log" 51 "os" 52 "os/exec" 53 "path" 54 "regexp" 55 "strings" 56 "sync" 57 "time" 58) 59 60var ( 61 breakpadTools = flag.String("breakpad-tools", "out/Release/", "Path to the Breakpad tools directory, containing dump_syms and symupload.") 62 uploadOnlyPath = flag.String("upload-from", "", "Upload a directory of symbol files that has been dumped independently.") 63 dumpOnlyPath = flag.String("dump-to", "", "Dump the symbols to the specified directory, but do not upload them.") 64 systemRoot = flag.String("system-root", "", "Path to the root of the Mac OS X system whose symbols will be dumped.") 65 dumpArchitecture = flag.String("arch", "", "The CPU architecture for which symbols should be dumped. If not specified, dumps all architectures.") 66 apiKey = flag.String("api-key", "", "API key to use. If this is present, the `sym-upload-v2` protocol is used.\nSee https://chromium.googlesource.com/breakpad/breakpad/+/HEAD/docs/sym_upload_v2_protocol.md or\n`symupload`'s help for more information.") 67) 68 69var ( 70 // pathsToScan are the subpaths in the systemRoot that should be scanned for shared libraries. 71 pathsToScan = []string{ 72 "/System/Library/Frameworks", 73 "/System/Library/PrivateFrameworks", 74 "/usr/lib", 75 } 76 77 // optionalPathsToScan is just like pathsToScan, but the paths are permitted to be absent. 78 optionalPathsToScan = []string{ 79 // Gone in 10.15. 80 "/Library/QuickTime", 81 // Not present in dumped dyld_shared_caches 82 "/System/Library/Components", 83 } 84 85 // uploadServersV1 are the list of servers to which symbols should be 86 // uploaded when using the V1 protocol. 87 uploadServersV1 = []string{ 88 "https://clients2.google.com/cr/symbol", 89 "https://clients2.google.com/cr/staging_symbol", 90 } 91 // uploadServersV2 are the list of servers to which symbols should be 92 // uploaded when using the V2 protocol. 93 uploadServersV2 = []string{ 94 "https://staging-crashsymbolcollector-pa.googleapis.com", 95 "https://prod-crashsymbolcollector-pa.googleapis.com", 96 } 97 98 // uploadServers are the list of servers that should be used, accounting 99 // for whether v1 or v2 protocol is used. 100 uploadServers = uploadServersV1 101 102 // blacklistRegexps match paths that should be excluded from dumping. 103 blacklistRegexps = []*regexp.Regexp{ 104 regexp.MustCompile(`/System/Library/Frameworks/Python\.framework/`), 105 regexp.MustCompile(`/System/Library/Frameworks/Ruby\.framework/`), 106 regexp.MustCompile(`_profile\.dylib$`), 107 regexp.MustCompile(`_debug\.dylib$`), 108 regexp.MustCompile(`\.a$`), 109 regexp.MustCompile(`\.dat$`), 110 } 111) 112 113func main() { 114 flag.Parse() 115 log.SetFlags(0) 116 117 // If `apiKey` is set, we're using the v2 protocol. 118 if len(*apiKey) > 0 { 119 uploadServers = uploadServersV2 120 } 121 122 var uq *UploadQueue 123 124 if *uploadOnlyPath != "" { 125 // -upload-from specified, so handle that case early. 126 uq = StartUploadQueue() 127 uploadFromDirectory(*uploadOnlyPath, uq) 128 uq.Wait() 129 return 130 } 131 132 if *systemRoot == "" { 133 log.Fatal("Need a -system-root to dump symbols for") 134 } 135 136 if *dumpOnlyPath != "" { 137 // -dump-to specified, so make sure that the path is a directory. 138 if fi, err := os.Stat(*dumpOnlyPath); err != nil { 139 log.Fatalf("-dump-to location: %v", err) 140 } else if !fi.IsDir() { 141 log.Fatal("-dump-to location is not a directory") 142 } 143 } 144 145 dumpPath := *dumpOnlyPath 146 if *dumpOnlyPath == "" { 147 // If -dump-to was not specified, then run the upload pipeline and create 148 // a temporary dump output directory. 149 uq = StartUploadQueue() 150 151 if p, err := ioutil.TempDir("", "upload_system_symbols"); err != nil { 152 log.Fatalf("Failed to create temporary directory: %v", err) 153 } else { 154 dumpPath = p 155 defer os.RemoveAll(p) 156 } 157 } 158 159 dq := StartDumpQueue(*systemRoot, dumpPath, uq) 160 dq.Wait() 161 if uq != nil { 162 uq.Wait() 163 } 164} 165 166// manglePath reduces an absolute filesystem path to a string suitable as the 167// base for a file name which encodes some of the original path. The result 168// concatenates the leading initial from each path component except the last to 169// the last path component; for example /System/Library/Frameworks/AppKit 170// becomes SLFAppKit. 171// Assumes ASCII. 172func manglePath(path string) string { 173 components := strings.Split(path, "/") 174 n := len(components) 175 builder := strings.Builder{} 176 for i, component := range components { 177 if len(component) == 0 { 178 continue 179 } 180 if i < n-1 { 181 builder.WriteString(component[:1]) 182 } else { 183 builder.WriteString(component) 184 } 185 } 186 return builder.String() 187} 188 189type WorkerPool struct { 190 wg sync.WaitGroup 191} 192 193// StartWorkerPool will launch numWorkers goroutines all running workerFunc. 194// When workerFunc exits, the goroutine will terminate. 195func StartWorkerPool(numWorkers int, workerFunc func()) *WorkerPool { 196 p := new(WorkerPool) 197 for i := 0; i < numWorkers; i++ { 198 p.wg.Add(1) 199 go func() { 200 workerFunc() 201 p.wg.Done() 202 }() 203 } 204 return p 205} 206 207// Wait for all the workers in the pool to complete the workerFunc. 208func (p *WorkerPool) Wait() { 209 p.wg.Wait() 210} 211 212type UploadQueue struct { 213 *WorkerPool 214 queue chan string 215} 216 217// StartUploadQueue creates a new worker pool and queue, to which paths to 218// Breakpad symbol files may be sent for uploading. 219func StartUploadQueue() *UploadQueue { 220 uq := &UploadQueue{ 221 queue: make(chan string, 10), 222 } 223 uq.WorkerPool = StartWorkerPool(5, uq.worker) 224 return uq 225} 226 227// Upload enqueues the contents of filepath to be uploaded. 228func (uq *UploadQueue) Upload(filepath string) { 229 uq.queue <- filepath 230} 231 232// Done tells the queue that no more files need to be uploaded. This must be 233// called before WorkerPool.Wait. 234func (uq *UploadQueue) Done() { 235 close(uq.queue) 236} 237 238func (uq *UploadQueue) runSymUpload(symfile, server string) *exec.Cmd { 239 symUpload := path.Join(*breakpadTools, "symupload") 240 args := []string{symfile, server} 241 if len(*apiKey) > 0 { 242 args = append([]string{"-p", "sym-upload-v2", "-k", *apiKey}, args...) 243 } 244 return exec.Command(symUpload, args...) 245} 246 247func (uq *UploadQueue) worker() { 248 for symfile := range uq.queue { 249 for _, server := range uploadServers { 250 for i := 0; i < 3; i++ { // Give each upload 3 attempts to succeed. 251 cmd := uq.runSymUpload(symfile, server) 252 if output, err := cmd.Output(); err == nil { 253 // Success. No retry needed. 254 fmt.Printf("Uploaded %s to %s\n", symfile, server) 255 break 256 } else if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() == 2 && *apiKey != "" { 257 // Exit code 2 in protocol v2 means the file already exists on the server. 258 // No point retrying. 259 fmt.Printf("File %s already exists on %s\n", symfile, server) 260 break 261 } else { 262 log.Printf("Error running symupload(%s, %s), attempt %d: %v: %s\n", symfile, server, i, err, output) 263 time.Sleep(1 * time.Second) 264 } 265 } 266 } 267 } 268} 269 270type DumpQueue struct { 271 *WorkerPool 272 dumpPath string 273 queue chan dumpRequest 274 uq *UploadQueue 275} 276 277type dumpRequest struct { 278 path string 279 arch string 280} 281 282// StartDumpQueue creates a new worker pool to find all the Mach-O libraries in 283// root and dump their symbols to dumpPath. If an UploadQueue is passed, the 284// path to the symbol file will be enqueued there, too. 285func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue { 286 dq := &DumpQueue{ 287 dumpPath: dumpPath, 288 queue: make(chan dumpRequest), 289 uq: uq, 290 } 291 dq.WorkerPool = StartWorkerPool(12, dq.worker) 292 293 findLibsInRoot(root, dq) 294 295 return dq 296} 297 298// DumpSymbols enqueues the filepath to have its symbols dumped in the specified 299// architecture. 300func (dq *DumpQueue) DumpSymbols(filepath string, arch string) { 301 dq.queue <- dumpRequest{ 302 path: filepath, 303 arch: arch, 304 } 305} 306 307func (dq *DumpQueue) Wait() { 308 dq.WorkerPool.Wait() 309 if dq.uq != nil { 310 dq.uq.Done() 311 } 312} 313 314func (dq *DumpQueue) done() { 315 close(dq.queue) 316} 317 318func (dq *DumpQueue) worker() { 319 dumpSyms := path.Join(*breakpadTools, "dump_syms") 320 321 for req := range dq.queue { 322 filebase := path.Join(dq.dumpPath, manglePath(req.path)) 323 symfile := fmt.Sprintf("%s_%s.sym", filebase, req.arch) 324 f, err := os.Create(symfile) 325 if err != nil { 326 log.Fatalf("Error creating symbol file: %v", err) 327 } 328 329 cmd := exec.Command(dumpSyms, "-a", req.arch, req.path) 330 cmd.Stdout = f 331 err = cmd.Run() 332 f.Close() 333 334 if err != nil { 335 os.Remove(symfile) 336 log.Printf("Error running dump_syms(%s, %s): %v\n", req.arch, req.path, err) 337 } else if dq.uq != nil { 338 dq.uq.Upload(symfile) 339 } 340 } 341} 342 343// uploadFromDirectory handles the upload-only case and merely uploads all files in 344// a directory. 345func uploadFromDirectory(directory string, uq *UploadQueue) { 346 d, err := os.Open(directory) 347 if err != nil { 348 log.Fatalf("Could not open directory to upload: %v", err) 349 } 350 defer d.Close() 351 352 entries, err := d.Readdirnames(0) 353 if err != nil { 354 log.Fatalf("Could not read directory: %v", err) 355 } 356 357 for _, entry := range entries { 358 uq.Upload(path.Join(directory, entry)) 359 } 360 361 uq.Done() 362} 363 364// findQueue is an implementation detail of the DumpQueue that finds all the 365// Mach-O files and their architectures. 366type findQueue struct { 367 *WorkerPool 368 queue chan string 369 dq *DumpQueue 370} 371 372// findLibsInRoot looks in all the pathsToScan in the root and manages the 373// interaction between findQueue and DumpQueue. 374func findLibsInRoot(root string, dq *DumpQueue) { 375 fq := &findQueue{ 376 queue: make(chan string, 10), 377 dq: dq, 378 } 379 fq.WorkerPool = StartWorkerPool(12, fq.worker) 380 381 for _, p := range pathsToScan { 382 fq.findLibsInPath(path.Join(root, p), true) 383 } 384 385 for _, p := range optionalPathsToScan { 386 fq.findLibsInPath(path.Join(root, p), false) 387 } 388 389 close(fq.queue) 390 fq.Wait() 391 dq.done() 392} 393 394// findLibsInPath recursively walks the directory tree, sending file paths to 395// test for being Mach-O to the findQueue. 396func (fq *findQueue) findLibsInPath(loc string, mustExist bool) { 397 d, err := os.Open(loc) 398 if err != nil { 399 if !mustExist && os.IsNotExist(err) { 400 return 401 } 402 log.Fatalf("Could not open %s: %v", loc, err) 403 } 404 defer d.Close() 405 406 for { 407 fis, err := d.Readdir(100) 408 if err != nil && err != io.EOF { 409 log.Fatalf("Error reading directory %s: %v", loc, err) 410 } 411 412 for _, fi := range fis { 413 fp := path.Join(loc, fi.Name()) 414 if fi.IsDir() { 415 fq.findLibsInPath(fp, true) 416 continue 417 } else if fi.Mode()&os.ModeSymlink != 0 { 418 continue 419 } 420 421 // Test the blacklist in the worker to not slow down this main loop. 422 423 fq.queue <- fp 424 } 425 426 if err == io.EOF { 427 break 428 } 429 } 430} 431 432func (fq *findQueue) worker() { 433 for fp := range fq.queue { 434 blacklisted := false 435 for _, re := range blacklistRegexps { 436 blacklisted = blacklisted || re.MatchString(fp) 437 } 438 if blacklisted { 439 continue 440 } 441 442 f, err := os.Open(fp) 443 if err != nil { 444 log.Printf("%s: %v", fp, err) 445 continue 446 } 447 448 fatFile, err := macho.NewFatFile(f) 449 if err == nil { 450 // The file is fat, so dump its architectures. 451 for _, fatArch := range fatFile.Arches { 452 fq.dumpMachOFile(fp, fatArch.File) 453 } 454 fatFile.Close() 455 } else if err == macho.ErrNotFat { 456 // The file isn't fat but may still be MachO. 457 thinFile, err := macho.NewFile(f) 458 if err != nil { 459 log.Printf("%s: %v", fp, err) 460 continue 461 } 462 fq.dumpMachOFile(fp, thinFile) 463 thinFile.Close() 464 } else { 465 f.Close() 466 } 467 } 468} 469 470func (fq *findQueue) dumpMachOFile(fp string, image *macho.File) { 471 if image.Type != MachODylib && image.Type != MachOBundle && image.Type != MachODylinker { 472 return 473 } 474 475 arch := getArchStringFromHeader(image.FileHeader) 476 if arch == "" { 477 // Don't know about this architecture type. 478 return 479 } 480 481 if (*dumpArchitecture != "" && *dumpArchitecture == arch) || *dumpArchitecture == "" { 482 fq.dq.DumpSymbols(fp, arch) 483 } 484} 485