1// Copyright 2017 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package finder 16 17import ( 18 "bufio" 19 "bytes" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "path/filepath" 26 "runtime" 27 "sort" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "android/soong/finder/fs" 34) 35 36// This file provides a Finder struct that can quickly search for files satisfying 37// certain criteria. 38// This Finder gets its speed partially from parallelism and partially from caching. 39// If a Stat call returns the same result as last time, then it means Finder 40// can skip the ReadDir call for that dir. 41 42// The primary data structure used by the finder is the field Finder.nodes , 43// which is a tree of nodes of type *pathMap . 44// Each node represents a directory on disk, along with its stats, subdirectories, 45// and contained files. 46 47// The common use case for the Finder is that the caller creates a Finder and gives 48// it the same query that was given to it in the previous execution. 49// In this situation, the major events that take place are: 50// 1. The Finder begins to load its db 51// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads) 52// Calling Stat on each of these directories is generally a large fraction of the total time 53// 3. The Finder begins to construct a separate tree of nodes in each of its threads 54// 4. The Finder merges the individual node trees into the main node tree 55// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date 56// These ReadDir calls might prompt additional Stat calls, etc 57// 6. The Finder waits for all loading to complete 58// 7. The Finder searches the cache for files matching the user's query (using multiple threads) 59 60// These are the invariants regarding concurrency: 61// 1. The public methods of Finder are threadsafe. 62// The public methods are only performance-optimized for one caller at a time, however. 63// For the moment, multiple concurrent callers shouldn't expect any better performance than 64// multiple serial callers. 65// 2. While building the node tree, only one thread may ever access the <children> collection of a 66// *pathMap at once. 67// a) The thread that accesses the <children> collection is the thread that discovers the 68// children (by reading them from the cache or by having received a response to ReadDir). 69// 1) Consequently, the thread that discovers the children also spawns requests to stat 70// subdirs. 71// b) Consequently, while building the node tree, no thread may do a lookup of its 72// *pathMap via filepath because another thread may be adding children to the 73// <children> collection of an ancestor node. Additionally, in rare cases, another thread 74// may be removing children from an ancestor node if the children were only discovered to 75// be irrelevant after calling ReadDir (which happens if a prune-file was just added). 76// 3. No query will begin to be serviced until all loading (both reading the db 77// and scanning the filesystem) is complete. 78// Tests indicate that it only takes about 10% as long to search the in-memory cache as to 79// generate it, making this not a huge loss in performance. 80// 4. The parsing of the db and the initial setup of the pathMap tree must complete before 81// beginning to call listDirSync (because listDirSync can create new entries in the pathMap) 82 83// see cmd/finder.go or finder_test.go for usage examples 84 85// Update versionString whenever making a backwards-incompatible change to the cache file format 86const versionString = "Android finder version 1" 87 88// a CacheParams specifies which files and directories the user wishes be scanned and 89// potentially added to the cache 90type CacheParams struct { 91 // WorkingDirectory is used as a base for any relative file paths given to the Finder 92 WorkingDirectory string 93 94 // RootDirs are the root directories used to initiate the search 95 RootDirs []string 96 97 // ExcludeDirs are directory names that if encountered are removed from the search 98 ExcludeDirs []string 99 100 // PruneFiles are file names that if encountered prune their entire directory 101 // (including siblings) 102 PruneFiles []string 103 104 // IncludeFiles are file names to include as matches 105 IncludeFiles []string 106} 107 108// a cacheConfig stores the inputs that determine what should be included in the cache 109type cacheConfig struct { 110 CacheParams 111 112 // FilesystemView is a unique identifier telling which parts of which file systems 113 // are readable by the Finder. In practice its value is essentially username@hostname. 114 // FilesystemView is set to ensure that a cache file copied to another host or 115 // found by another user doesn't inadvertently get reused. 116 FilesystemView string 117} 118 119func (p *cacheConfig) Dump() ([]byte, error) { 120 bytes, err := json.Marshal(p) 121 return bytes, err 122} 123 124// a cacheMetadata stores version information about the cache 125type cacheMetadata struct { 126 // The Version enables the Finder to determine whether it can even parse the file 127 // If the version changes, the entire cache file must be regenerated 128 Version string 129 130 // The CacheParams enables the Finder to determine whether the parameters match 131 // If the CacheParams change, the Finder can choose how much of the cache file to reuse 132 // (although in practice, the Finder will probably choose to ignore the entire file anyway) 133 Config cacheConfig 134} 135 136type Logger interface { 137 Output(calldepth int, s string) error 138} 139 140// the Finder is the main struct that callers will want to use 141type Finder struct { 142 // configuration 143 DbPath string 144 numDbLoadingThreads int 145 numSearchingThreads int 146 cacheMetadata cacheMetadata 147 logger Logger 148 filesystem fs.FileSystem 149 150 // temporary state 151 threadPool *threadPool 152 mutex sync.Mutex 153 fsErrs []fsErr 154 errlock sync.Mutex 155 shutdownWaitgroup sync.WaitGroup 156 157 // non-temporary state 158 modifiedFlag int32 159 nodes pathMap 160} 161 162var defaultNumThreads = runtime.NumCPU() * 2 163 164// New creates a new Finder for use 165func New(cacheParams CacheParams, filesystem fs.FileSystem, 166 logger Logger, dbPath string) (f *Finder, err error) { 167 return newImpl(cacheParams, filesystem, logger, dbPath, defaultNumThreads) 168} 169 170// newImpl is like New but accepts more params 171func newImpl(cacheParams CacheParams, filesystem fs.FileSystem, 172 logger Logger, dbPath string, numThreads int) (f *Finder, err error) { 173 numDbLoadingThreads := numThreads 174 numSearchingThreads := numThreads 175 176 metadata := cacheMetadata{ 177 Version: versionString, 178 Config: cacheConfig{ 179 CacheParams: cacheParams, 180 FilesystemView: filesystem.ViewId(), 181 }, 182 } 183 184 f = &Finder{ 185 numDbLoadingThreads: numDbLoadingThreads, 186 numSearchingThreads: numSearchingThreads, 187 cacheMetadata: metadata, 188 logger: logger, 189 filesystem: filesystem, 190 191 nodes: *newPathMap("/"), 192 DbPath: dbPath, 193 194 shutdownWaitgroup: sync.WaitGroup{}, 195 } 196 197 f.loadFromFilesystem() 198 199 // check for any filesystem errors 200 err = f.getErr() 201 if err != nil { 202 return nil, err 203 } 204 205 // confirm that every path mentioned in the CacheConfig exists 206 for _, path := range cacheParams.RootDirs { 207 if !filepath.IsAbs(path) { 208 path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path) 209 } 210 node := f.nodes.GetNode(filepath.Clean(path), false) 211 if node == nil || node.ModTime == 0 { 212 return nil, fmt.Errorf("path %v was specified to be included in the cache but does not exist\n", path) 213 } 214 } 215 216 return f, nil 217} 218 219// FindNamed searches for every cached file 220func (f *Finder) FindAll() []string { 221 return f.FindAt("/") 222} 223 224// FindNamed searches for every cached file under <rootDir> 225func (f *Finder) FindAt(rootDir string) []string { 226 filter := func(entries DirEntries) (dirNames []string, fileNames []string) { 227 return entries.DirNames, entries.FileNames 228 } 229 return f.FindMatching(rootDir, filter) 230} 231 232// FindNamed searches for every cached file named <fileName> 233func (f *Finder) FindNamed(fileName string) []string { 234 return f.FindNamedAt("/", fileName) 235} 236 237// FindNamedAt searches under <rootPath> for every file named <fileName> 238// The reason a caller might use FindNamedAt instead of FindNamed is if they want 239// to limit their search to a subset of the cache 240func (f *Finder) FindNamedAt(rootPath string, fileName string) []string { 241 filter := func(entries DirEntries) (dirNames []string, fileNames []string) { 242 matches := []string{} 243 for _, foundName := range entries.FileNames { 244 if foundName == fileName { 245 matches = append(matches, foundName) 246 } 247 } 248 return entries.DirNames, matches 249 250 } 251 return f.FindMatching(rootPath, filter) 252} 253 254// FindFirstNamed searches for every file named <fileName> 255// Whenever it finds a match, it stops search subdirectories 256func (f *Finder) FindFirstNamed(fileName string) []string { 257 return f.FindFirstNamedAt("/", fileName) 258} 259 260// FindFirstNamedAt searches for every file named <fileName> 261// Whenever it finds a match, it stops search subdirectories 262func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string { 263 filter := func(entries DirEntries) (dirNames []string, fileNames []string) { 264 matches := []string{} 265 for _, foundName := range entries.FileNames { 266 if foundName == fileName { 267 matches = append(matches, foundName) 268 } 269 } 270 271 if len(matches) > 0 { 272 return []string{}, matches 273 } 274 return entries.DirNames, matches 275 } 276 return f.FindMatching(rootPath, filter) 277} 278 279// FindMatching is the most general exported function for searching for files in the cache 280// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries 281// in place, removing file paths and directories as desired. 282// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe. 283func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string { 284 // set up some parameters 285 scanStart := time.Now() 286 var isRel bool 287 workingDir := f.cacheMetadata.Config.WorkingDirectory 288 289 isRel = !filepath.IsAbs(rootPath) 290 if isRel { 291 rootPath = filepath.Join(workingDir, rootPath) 292 } 293 294 rootPath = filepath.Clean(rootPath) 295 296 // ensure nothing else is using the Finder 297 f.verbosef("FindMatching waiting for finder to be idle\n") 298 f.lock() 299 defer f.unlock() 300 301 node := f.nodes.GetNode(rootPath, false) 302 if node == nil { 303 f.verbosef("No data for path %v ; apparently not included in cache params: %v\n", 304 rootPath, f.cacheMetadata.Config.CacheParams) 305 // path is not found; don't do a search 306 return []string{} 307 } 308 309 // search for matching files 310 f.verbosef("Finder finding %v using cache\n", rootPath) 311 results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads) 312 313 // format and return results 314 if isRel { 315 for i := 0; i < len(results); i++ { 316 results[i] = strings.Replace(results[i], workingDir+"/", "", 1) 317 } 318 } 319 sort.Strings(results) 320 f.verbosef("Found %v files under %v in %v using cache\n", 321 len(results), rootPath, time.Since(scanStart)) 322 return results 323} 324 325// Shutdown declares that the finder is no longer needed and waits for its cleanup to complete 326// Currently, that only entails waiting for the database dump to complete. 327func (f *Finder) Shutdown() { 328 f.waitForDbDump() 329} 330 331// End of public api 332 333func (f *Finder) goDumpDb() { 334 if f.wasModified() { 335 f.shutdownWaitgroup.Add(1) 336 go func() { 337 err := f.dumpDb() 338 if err != nil { 339 f.verbosef("%v\n", err) 340 } 341 f.shutdownWaitgroup.Done() 342 }() 343 } else { 344 f.verbosef("Skipping dumping unmodified db\n") 345 } 346} 347 348func (f *Finder) waitForDbDump() { 349 f.shutdownWaitgroup.Wait() 350} 351 352// joinCleanPaths is like filepath.Join but is faster because 353// joinCleanPaths doesn't have to support paths ending in "/" or containing ".." 354func joinCleanPaths(base string, leaf string) string { 355 if base == "" { 356 return leaf 357 } 358 if base == "/" { 359 return base + leaf 360 } 361 if leaf == "" { 362 return base 363 } 364 return base + "/" + leaf 365} 366 367func (f *Finder) verbosef(format string, args ...interface{}) { 368 f.logger.Output(2, fmt.Sprintf(format, args...)) 369} 370 371// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem 372func (f *Finder) loadFromFilesystem() { 373 f.threadPool = newThreadPool(f.numDbLoadingThreads) 374 375 err := f.startFromExternalCache() 376 if err != nil { 377 f.startWithoutExternalCache() 378 } 379 380 f.goDumpDb() 381 382 f.threadPool = nil 383} 384 385func (f *Finder) startFind(path string) { 386 if !filepath.IsAbs(path) { 387 path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path) 388 } 389 node := f.nodes.GetNode(path, true) 390 f.statDirAsync(node) 391} 392 393func (f *Finder) lock() { 394 f.mutex.Lock() 395} 396 397func (f *Finder) unlock() { 398 f.mutex.Unlock() 399} 400 401// a statResponse is the relevant portion of the response from the filesystem to a Stat call 402type statResponse struct { 403 ModTime int64 404 Inode uint64 405 Device uint64 406} 407 408// a pathAndStats stores a path and its stats 409type pathAndStats struct { 410 statResponse 411 412 Path string 413} 414 415// a dirFullInfo stores all of the relevant information we know about a directory 416type dirFullInfo struct { 417 pathAndStats 418 419 FileNames []string 420} 421 422// a PersistedDirInfo is the information about a dir that we save to our cache on disk 423type PersistedDirInfo struct { 424 // These field names are short because they are repeated many times in the output json file 425 P string // path 426 T int64 // modification time 427 I uint64 // inode number 428 F []string // relevant filenames contained 429} 430 431// a PersistedDirs is the information that we persist for a group of dirs 432type PersistedDirs struct { 433 // the device on which each directory is stored 434 Device uint64 435 // the common root path to which all contained dirs are relative 436 Root string 437 // the directories themselves 438 Dirs []PersistedDirInfo 439} 440 441// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time 442type CacheEntry []PersistedDirs 443 444// a DirEntries lists the files and directories contained directly within a specific directory 445type DirEntries struct { 446 Path string 447 448 // elements of DirNames are just the dir names; they don't include any '/' character 449 DirNames []string 450 // elements of FileNames are just the file names; they don't include '/' character 451 FileNames []string 452} 453 454// a WalkFunc is the type that is passed into various Find functions for determining which 455// directories the caller wishes be walked. The WalkFunc is expected to decide which 456// directories to walk and which files to consider as matches to the original query. 457type WalkFunc func(DirEntries) (dirs []string, files []string) 458 459// a mapNode stores the relevant stats about a directory to be stored in a pathMap 460type mapNode struct { 461 statResponse 462 FileNames []string 463} 464 465// a pathMap implements the directory tree structure of nodes 466type pathMap struct { 467 mapNode 468 469 path string 470 471 children map[string]*pathMap 472 473 // number of descendent nodes, including self 474 approximateNumDescendents int 475} 476 477func newPathMap(path string) *pathMap { 478 result := &pathMap{path: path, children: make(map[string]*pathMap, 4), 479 approximateNumDescendents: 1} 480 return result 481} 482 483// GetNode returns the node at <path> 484func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap { 485 if len(path) > 0 && path[0] == '/' { 486 path = path[1:] 487 } 488 489 node := m 490 for { 491 if path == "" { 492 return node 493 } 494 495 index := strings.Index(path, "/") 496 var firstComponent string 497 if index >= 0 { 498 firstComponent = path[:index] 499 path = path[index+1:] 500 } else { 501 firstComponent = path 502 path = "" 503 } 504 505 child, found := node.children[firstComponent] 506 507 if !found { 508 if createIfNotFound { 509 child = node.newChild(firstComponent) 510 } else { 511 return nil 512 } 513 } 514 515 node = child 516 } 517} 518 519func (m *pathMap) newChild(name string) (child *pathMap) { 520 path := joinCleanPaths(m.path, name) 521 newChild := newPathMap(path) 522 m.children[name] = newChild 523 524 return m.children[name] 525} 526 527func (m *pathMap) UpdateNumDescendents() int { 528 count := 1 529 for _, child := range m.children { 530 count += child.approximateNumDescendents 531 } 532 m.approximateNumDescendents = count 533 return count 534} 535 536func (m *pathMap) UpdateNumDescendentsRecursive() { 537 for _, child := range m.children { 538 child.UpdateNumDescendentsRecursive() 539 } 540 m.UpdateNumDescendents() 541} 542 543func (m *pathMap) MergeIn(other *pathMap) { 544 for key, theirs := range other.children { 545 ours, found := m.children[key] 546 if found { 547 ours.MergeIn(theirs) 548 } else { 549 m.children[key] = theirs 550 } 551 } 552 if other.ModTime != 0 { 553 m.mapNode = other.mapNode 554 } 555 m.UpdateNumDescendents() 556} 557 558func (m *pathMap) DumpAll() []dirFullInfo { 559 results := []dirFullInfo{} 560 m.dumpInto("", &results) 561 return results 562} 563 564func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) { 565 *results = append(*results, 566 dirFullInfo{ 567 pathAndStats{statResponse: m.statResponse, Path: path}, 568 m.FileNames}, 569 ) 570 for key, child := range m.children { 571 childPath := joinCleanPaths(path, key) 572 if len(childPath) == 0 || childPath[0] != '/' { 573 childPath = "/" + childPath 574 } 575 child.dumpInto(childPath, results) 576 } 577} 578 579// a semaphore can be locked by up to <capacity> callers at once 580type semaphore struct { 581 pool chan bool 582} 583 584func newSemaphore(capacity int) *semaphore { 585 return &semaphore{pool: make(chan bool, capacity)} 586} 587 588func (l *semaphore) Lock() { 589 l.pool <- true 590} 591 592func (l *semaphore) Unlock() { 593 <-l.pool 594} 595 596// A threadPool runs goroutines and supports throttling and waiting. 597// Without throttling, Go may exhaust the maximum number of various resources, such as 598// threads or file descriptors, and crash the program. 599type threadPool struct { 600 receivedRequests sync.WaitGroup 601 activeRequests semaphore 602} 603 604func newThreadPool(maxNumConcurrentThreads int) *threadPool { 605 return &threadPool{ 606 receivedRequests: sync.WaitGroup{}, 607 activeRequests: *newSemaphore(maxNumConcurrentThreads), 608 } 609} 610 611// Run requests to run the given function in its own goroutine 612func (p *threadPool) Run(function func()) { 613 p.receivedRequests.Add(1) 614 // If Run() was called from within a goroutine spawned by this threadPool, 615 // then we may need to return from Run() before having capacity to actually 616 // run <function>. 617 // 618 // It's possible that the body of <function> contains a statement (such as a syscall) 619 // that will cause Go to pin it to a thread, or will contain a statement that uses 620 // another resource that is in short supply (such as a file descriptor), so we can't 621 // actually run <function> until we have capacity. 622 // 623 // However, the semaphore used for synchronization is implemented via a channel and 624 // shouldn't require a new thread for each access. 625 go func() { 626 p.activeRequests.Lock() 627 function() 628 p.activeRequests.Unlock() 629 p.receivedRequests.Done() 630 }() 631} 632 633// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait 634func (p *threadPool) Wait() { 635 p.receivedRequests.Wait() 636} 637 638type fsErr struct { 639 path string 640 err error 641} 642 643func (e fsErr) String() string { 644 return e.path + ": " + e.err.Error() 645} 646 647func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) { 648 // group each dirFullInfo by its Device, to avoid having to repeat it in the output 649 dirsByDevice := map[uint64][]PersistedDirInfo{} 650 for _, entry := range dirInfos { 651 _, found := dirsByDevice[entry.Device] 652 if !found { 653 dirsByDevice[entry.Device] = []PersistedDirInfo{} 654 } 655 dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device], 656 PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames}) 657 } 658 659 cacheEntry := CacheEntry{} 660 661 for device, infos := range dirsByDevice { 662 // find common prefix 663 prefix := "" 664 if len(infos) > 0 { 665 prefix = infos[0].P 666 } 667 for _, info := range infos { 668 for !strings.HasPrefix(info.P+"/", prefix+"/") { 669 prefix = filepath.Dir(prefix) 670 if prefix == "/" { 671 break 672 } 673 } 674 } 675 // remove common prefix 676 for i := range infos { 677 suffix := strings.Replace(infos[i].P, prefix, "", 1) 678 if len(suffix) > 0 && suffix[0] == '/' { 679 suffix = suffix[1:] 680 } 681 infos[i].P = suffix 682 } 683 684 // turn the map (keyed by device) into a list of structs with labeled fields 685 // this is to improve readability of the output 686 cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos}) 687 } 688 689 // convert to json. 690 // it would save some space to use a different format than json for the db file, 691 // but the space and time savings are small, and json is easy for humans to read 692 bytes, err := json.Marshal(cacheEntry) 693 return bytes, err 694} 695 696func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) { 697 var cacheEntry CacheEntry 698 err := json.Unmarshal(bytes, &cacheEntry) 699 if err != nil { 700 return nil, err 701 } 702 703 // convert from a CacheEntry to a []dirFullInfo (by copying a few fields) 704 capacity := 0 705 for _, element := range cacheEntry { 706 capacity += len(element.Dirs) 707 } 708 nodes := make([]dirFullInfo, capacity) 709 count := 0 710 for _, element := range cacheEntry { 711 for _, dir := range element.Dirs { 712 path := joinCleanPaths(element.Root, dir.P) 713 714 nodes[count] = dirFullInfo{ 715 pathAndStats: pathAndStats{ 716 statResponse: statResponse{ 717 ModTime: dir.T, Inode: dir.I, Device: element.Device, 718 }, 719 Path: path}, 720 FileNames: dir.F} 721 count++ 722 } 723 } 724 return nodes, nil 725} 726 727// We use the following separator byte to distinguish individually parseable blocks of json 728// because we know this separator won't appear in the json that we're parsing. 729// 730// The newline byte can only appear in a UTF-8 stream if the newline character appears, because: 731// - The newline character is encoded as "0000 1010" in binary ("0a" in hex) 732// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte 733// character. 734// 735// We know that the newline character will never appear in our json string, because: 736// - If a newline character appears as part of a data string, then json encoding will 737// emit two characters instead: '\' and 'n'. 738// - The json encoder that we use doesn't emit the optional newlines between any of its 739// other outputs. 740const lineSeparator = byte('\n') 741 742func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) { 743 return reader.ReadBytes(lineSeparator) 744} 745 746// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder 747func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool { 748 cacheVersionBytes, err := f.readLine(cacheReader) 749 if err != nil { 750 f.verbosef("Failed to read database header; database is invalid\n") 751 return false 752 } 753 if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator { 754 cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1] 755 } 756 cacheVersionString := string(cacheVersionBytes) 757 currentVersion := f.cacheMetadata.Version 758 if cacheVersionString != currentVersion { 759 f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion) 760 return false 761 } 762 763 cacheParamBytes, err := f.readLine(cacheReader) 764 if err != nil { 765 f.verbosef("Failed to read database search params; database is invalid\n") 766 return false 767 } 768 769 if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator { 770 cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1] 771 } 772 773 currentParamBytes, err := f.cacheMetadata.Config.Dump() 774 if err != nil { 775 panic("Finder failed to serialize its parameters") 776 } 777 cacheParamString := string(cacheParamBytes) 778 currentParamString := string(currentParamBytes) 779 if cacheParamString != currentParamString { 780 f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString) 781 return false 782 } 783 return true 784} 785 786// loadBytes compares the cache info in <data> to the state of the filesystem 787// loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked 788func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) { 789 790 helperStartTime := time.Now() 791 792 cachedNodes, err := f.parseCacheEntry(data) 793 if err != nil { 794 return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error()) 795 } 796 797 unmarshalDate := time.Now() 798 f.verbosef("Unmarshaled %v objects for %v in %v\n", 799 len(cachedNodes), id, unmarshalDate.Sub(helperStartTime)) 800 801 tempMap := newPathMap("/") 802 stats := make([]statResponse, len(cachedNodes)) 803 804 for i, node := range cachedNodes { 805 // check the file system for an updated timestamp 806 stats[i] = f.statDirSync(node.Path) 807 } 808 809 dirsToWalk = []string{} 810 for i, cachedNode := range cachedNodes { 811 updated := stats[i] 812 // save the cached value 813 container := tempMap.GetNode(cachedNode.Path, true) 814 container.mapNode = mapNode{statResponse: updated} 815 816 // if the metadata changed and the directory still exists, then 817 // make a note to walk it later 818 if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 { 819 f.setModified() 820 // make a note that the directory needs to be walked 821 dirsToWalk = append(dirsToWalk, cachedNode.Path) 822 } else { 823 container.mapNode.FileNames = cachedNode.FileNames 824 } 825 } 826 // count the number of nodes to improve our understanding of the shape of the tree, 827 // thereby improving parallelism of subsequent searches 828 tempMap.UpdateNumDescendentsRecursive() 829 830 f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate)) 831 return tempMap, dirsToWalk, nil 832} 833 834// startFromExternalCache loads the cache database from disk 835// startFromExternalCache waits to return until the load of the cache db is complete, but 836// startFromExternalCache does not wait for all every listDir() or statDir() request to complete 837func (f *Finder) startFromExternalCache() (err error) { 838 startTime := time.Now() 839 dbPath := f.DbPath 840 841 // open cache file and validate its header 842 reader, err := f.filesystem.Open(dbPath) 843 if err != nil { 844 return errors.New("No data to load from database\n") 845 } 846 bufferedReader := bufio.NewReader(reader) 847 if !f.validateCacheHeader(bufferedReader) { 848 return errors.New("Cache header does not match") 849 } 850 f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath) 851 852 // read the file and spawn threads to process it 853 nodesToWalk := [][]*pathMap{} 854 mainTree := newPathMap("/") 855 856 // read the blocks and stream them into <blockChannel> 857 type dataBlock struct { 858 id int 859 err error 860 data []byte 861 } 862 blockChannel := make(chan dataBlock, f.numDbLoadingThreads) 863 readBlocks := func() { 864 index := 0 865 for { 866 // It takes some time to unmarshal the input from json, so we want 867 // to unmarshal it in parallel. In order to find valid places to 868 // break the input, we scan for the line separators that we inserted 869 // (for this purpose) when we dumped the database. 870 data, err := f.readLine(bufferedReader) 871 var response dataBlock 872 done := false 873 if err != nil && err != io.EOF { 874 response = dataBlock{id: index, err: err, data: nil} 875 done = true 876 } else { 877 done = (err == io.EOF) 878 response = dataBlock{id: index, err: nil, data: data} 879 } 880 blockChannel <- response 881 index++ 882 duration := time.Since(startTime) 883 f.verbosef("Read block %v after %v\n", index, duration) 884 if done { 885 f.verbosef("Read %v blocks in %v\n", index, duration) 886 close(blockChannel) 887 return 888 } 889 } 890 } 891 go readBlocks() 892 893 // Read from <blockChannel> and stream the responses into <resultChannel>. 894 type workResponse struct { 895 id int 896 err error 897 tree *pathMap 898 updatedDirs []string 899 } 900 resultChannel := make(chan workResponse) 901 processBlocks := func() { 902 numProcessed := 0 903 threadPool := newThreadPool(f.numDbLoadingThreads) 904 for { 905 // get a block to process 906 block, received := <-blockChannel 907 if !received { 908 break 909 } 910 911 if block.err != nil { 912 resultChannel <- workResponse{err: block.err} 913 break 914 } 915 numProcessed++ 916 // wait until there is CPU available to process it 917 threadPool.Run( 918 func() { 919 processStartTime := time.Now() 920 f.verbosef("Starting to process block %v after %v\n", 921 block.id, processStartTime.Sub(startTime)) 922 tempMap, updatedDirs, err := f.loadBytes(block.id, block.data) 923 var response workResponse 924 if err != nil { 925 f.verbosef( 926 "Block %v failed to parse with error %v\n", 927 block.id, err) 928 response = workResponse{err: err} 929 } else { 930 response = workResponse{ 931 id: block.id, 932 err: nil, 933 tree: tempMap, 934 updatedDirs: updatedDirs, 935 } 936 } 937 f.verbosef("Processed block %v in %v\n", 938 block.id, time.Since(processStartTime), 939 ) 940 resultChannel <- response 941 }, 942 ) 943 } 944 threadPool.Wait() 945 f.verbosef("Finished processing %v blocks in %v\n", 946 numProcessed, time.Since(startTime)) 947 close(resultChannel) 948 } 949 go processBlocks() 950 951 // Read from <resultChannel> and use the results 952 combineResults := func() (err error) { 953 for { 954 result, received := <-resultChannel 955 if !received { 956 break 957 } 958 if err != nil { 959 // In case of an error, wait for work to complete before 960 // returning the error. This ensures that any subsequent 961 // work doesn't need to compete for resources (and possibly 962 // fail due to, for example, a filesystem limit on the number of 963 // concurrently open files) with past work. 964 continue 965 } 966 if result.err != nil { 967 err = result.err 968 continue 969 } 970 // update main tree 971 mainTree.MergeIn(result.tree) 972 // record any new directories that we will need to Stat() 973 updatedNodes := make([]*pathMap, len(result.updatedDirs)) 974 for j, dir := range result.updatedDirs { 975 node := mainTree.GetNode(dir, false) 976 updatedNodes[j] = node 977 } 978 nodesToWalk = append(nodesToWalk, updatedNodes) 979 } 980 return err 981 } 982 err = combineResults() 983 if err != nil { 984 return err 985 } 986 987 f.nodes = *mainTree 988 989 // after having loaded the entire db and therefore created entries for 990 // the directories we know of, now it's safe to start calling ReadDir on 991 // any updated directories 992 for i := range nodesToWalk { 993 f.listDirsAsync(nodesToWalk[i]) 994 } 995 f.verbosef("Loaded db and statted known dirs in %v\n", time.Since(startTime)) 996 f.threadPool.Wait() 997 f.verbosef("Loaded db and statted all dirs in %v\n", time.Now().Sub(startTime)) 998 999 return err 1000} 1001 1002// startWithoutExternalCache starts scanning the filesystem according to the cache config 1003// startWithoutExternalCache should be called if startFromExternalCache is not applicable 1004func (f *Finder) startWithoutExternalCache() { 1005 startTime := time.Now() 1006 configDirs := f.cacheMetadata.Config.RootDirs 1007 1008 // clean paths 1009 candidates := make([]string, len(configDirs)) 1010 for i, dir := range configDirs { 1011 candidates[i] = filepath.Clean(dir) 1012 } 1013 // remove duplicates 1014 dirsToScan := make([]string, 0, len(configDirs)) 1015 for _, candidate := range candidates { 1016 include := true 1017 for _, included := range dirsToScan { 1018 if included == "/" || strings.HasPrefix(candidate+"/", included+"/") { 1019 include = false 1020 break 1021 } 1022 } 1023 if include { 1024 dirsToScan = append(dirsToScan, candidate) 1025 } 1026 } 1027 1028 // start searching finally 1029 for _, path := range dirsToScan { 1030 f.verbosef("Starting find of %v\n", path) 1031 f.startFind(path) 1032 } 1033 1034 f.threadPool.Wait() 1035 1036 f.verbosef("Scanned filesystem (not using cache) in %v\n", time.Now().Sub(startTime)) 1037} 1038 1039// isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid 1040func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) { 1041 if old.Inode != new.Inode { 1042 return false 1043 } 1044 if old.ModTime != new.ModTime { 1045 return false 1046 } 1047 if old.Device != new.Device { 1048 return false 1049 } 1050 return true 1051} 1052 1053func (f *Finder) wasModified() bool { 1054 return atomic.LoadInt32(&f.modifiedFlag) > 0 1055} 1056 1057func (f *Finder) setModified() { 1058 var newVal int32 1059 newVal = 1 1060 atomic.StoreInt32(&f.modifiedFlag, newVal) 1061} 1062 1063// sortedDirEntries exports directory entries to facilitate dumping them to the external cache 1064func (f *Finder) sortedDirEntries() []dirFullInfo { 1065 startTime := time.Now() 1066 nodes := make([]dirFullInfo, 0) 1067 for _, node := range f.nodes.DumpAll() { 1068 if node.ModTime != 0 { 1069 nodes = append(nodes, node) 1070 } 1071 } 1072 discoveryDate := time.Now() 1073 f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime)) 1074 less := func(i int, j int) bool { 1075 return nodes[i].Path < nodes[j].Path 1076 } 1077 sort.Slice(nodes, less) 1078 sortDate := time.Now() 1079 f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate)) 1080 1081 return nodes 1082} 1083 1084// serializeDb converts the cache database into a form to save to disk 1085func (f *Finder) serializeDb() ([]byte, error) { 1086 // sort dir entries 1087 var entryList = f.sortedDirEntries() 1088 1089 // Generate an output file that can be conveniently loaded using the same number of threads 1090 // as were used in this execution (because presumably that will be the number of threads 1091 // used in the next execution too) 1092 1093 // generate header 1094 header := []byte{} 1095 header = append(header, []byte(f.cacheMetadata.Version)...) 1096 header = append(header, lineSeparator) 1097 configDump, err := f.cacheMetadata.Config.Dump() 1098 if err != nil { 1099 return nil, err 1100 } 1101 header = append(header, configDump...) 1102 1103 // serialize individual blocks in parallel 1104 numBlocks := f.numDbLoadingThreads 1105 if numBlocks > len(entryList) { 1106 numBlocks = len(entryList) 1107 } 1108 blocks := make([][]byte, 1+numBlocks) 1109 blocks[0] = header 1110 blockMin := 0 1111 wg := sync.WaitGroup{} 1112 var errLock sync.Mutex 1113 1114 for i := 1; i <= numBlocks; i++ { 1115 // identify next block 1116 blockMax := len(entryList) * i / numBlocks 1117 block := entryList[blockMin:blockMax] 1118 1119 // process block 1120 wg.Add(1) 1121 go func(index int, block []dirFullInfo) { 1122 byteBlock, subErr := f.serializeCacheEntry(block) 1123 f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock)) 1124 if subErr != nil { 1125 f.verbosef("%v\n", subErr.Error()) 1126 errLock.Lock() 1127 err = subErr 1128 errLock.Unlock() 1129 } else { 1130 blocks[index] = byteBlock 1131 } 1132 wg.Done() 1133 }(i, block) 1134 1135 blockMin = blockMax 1136 } 1137 1138 wg.Wait() 1139 1140 if err != nil { 1141 return nil, err 1142 } 1143 1144 content := bytes.Join(blocks, []byte{lineSeparator}) 1145 1146 return content, nil 1147} 1148 1149// dumpDb saves the cache database to disk 1150func (f *Finder) dumpDb() error { 1151 startTime := time.Now() 1152 f.verbosef("Dumping db\n") 1153 1154 tempPath := f.DbPath + ".tmp" 1155 1156 bytes, err := f.serializeDb() 1157 if err != nil { 1158 return err 1159 } 1160 serializeDate := time.Now() 1161 f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime)) 1162 // dump file and atomically move 1163 err = f.filesystem.WriteFile(tempPath, bytes, 0777) 1164 if err != nil { 1165 return err 1166 } 1167 err = f.filesystem.Rename(tempPath, f.DbPath) 1168 if err != nil { 1169 return err 1170 } 1171 1172 f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate)) 1173 return nil 1174 1175} 1176 1177// canIgnoreFsErr checks for certain classes of filesystem errors that are safe to ignore 1178func (f *Finder) canIgnoreFsErr(err error) bool { 1179 pathErr, isPathErr := err.(*os.PathError) 1180 if !isPathErr { 1181 // Don't recognize this error 1182 return false 1183 } 1184 if os.IsPermission(pathErr) { 1185 // Permission errors are ignored: 1186 // https://issuetracker.google.com/37553659 1187 // https://github.com/google/kati/pull/116 1188 return true 1189 } 1190 if pathErr.Err == os.ErrNotExist { 1191 // If a directory doesn't exist, that generally means the cache is out-of-date 1192 return true 1193 } 1194 // Don't recognize this error 1195 return false 1196} 1197 1198// onFsError should be called whenever a potentially fatal error is returned from a filesystem call 1199func (f *Finder) onFsError(path string, err error) { 1200 if !f.canIgnoreFsErr(err) { 1201 // We could send the errors through a channel instead, although that would cause this call 1202 // to block unless we preallocated a sufficient buffer or spawned a reader thread. 1203 // Although it wouldn't be too complicated to spawn a reader thread, it's still slightly 1204 // more convenient to use a lock. Only in an unusual situation should this code be 1205 // invoked anyway. 1206 f.errlock.Lock() 1207 f.fsErrs = append(f.fsErrs, fsErr{path: path, err: err}) 1208 f.errlock.Unlock() 1209 } 1210} 1211 1212// discardErrsForPrunedPaths removes any errors for paths that are no longer included in the cache 1213func (f *Finder) discardErrsForPrunedPaths() { 1214 // This function could be somewhat inefficient due to being single-threaded, 1215 // but the length of f.fsErrs should be approximately 0, so it shouldn't take long anyway. 1216 relevantErrs := make([]fsErr, 0, len(f.fsErrs)) 1217 for _, fsErr := range f.fsErrs { 1218 path := fsErr.path 1219 node := f.nodes.GetNode(path, false) 1220 if node != nil { 1221 // The path in question wasn't pruned due to a failure to process a parent directory. 1222 // So, the failure to process this path is important 1223 relevantErrs = append(relevantErrs, fsErr) 1224 } 1225 } 1226 f.fsErrs = relevantErrs 1227} 1228 1229// getErr returns an error based on previous calls to onFsErr, if any 1230func (f *Finder) getErr() error { 1231 f.discardErrsForPrunedPaths() 1232 1233 numErrs := len(f.fsErrs) 1234 if numErrs < 1 { 1235 return nil 1236 } 1237 1238 maxNumErrsToInclude := 10 1239 message := "" 1240 if numErrs > maxNumErrsToInclude { 1241 message = fmt.Sprintf("finder encountered %v errors: %v...", numErrs, f.fsErrs[:maxNumErrsToInclude]) 1242 } else { 1243 message = fmt.Sprintf("finder encountered %v errors: %v", numErrs, f.fsErrs) 1244 } 1245 1246 return errors.New(message) 1247} 1248 1249func (f *Finder) statDirAsync(dir *pathMap) { 1250 node := dir 1251 path := dir.path 1252 f.threadPool.Run( 1253 func() { 1254 updatedStats := f.statDirSync(path) 1255 1256 if !f.isInfoUpToDate(node.statResponse, updatedStats) { 1257 node.mapNode = mapNode{ 1258 statResponse: updatedStats, 1259 FileNames: []string{}, 1260 } 1261 f.setModified() 1262 if node.statResponse.ModTime != 0 { 1263 // modification time was updated, so re-scan for 1264 // child directories 1265 f.listDirAsync(dir) 1266 } 1267 } 1268 }, 1269 ) 1270} 1271 1272func (f *Finder) statDirSync(path string) statResponse { 1273 1274 fileInfo, err := f.filesystem.Lstat(path) 1275 1276 var stats statResponse 1277 if err != nil { 1278 // possibly record this error 1279 f.onFsError(path, err) 1280 // in case of a failure to stat the directory, treat the directory as missing (modTime = 0) 1281 return stats 1282 } 1283 modTime := fileInfo.ModTime() 1284 stats = statResponse{} 1285 inode, err := f.filesystem.InodeNumber(fileInfo) 1286 if err != nil { 1287 panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error())) 1288 } 1289 stats.Inode = inode 1290 device, err := f.filesystem.DeviceNumber(fileInfo) 1291 if err != nil { 1292 panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error())) 1293 } 1294 stats.Device = device 1295 permissionsChangeTime, err := f.filesystem.PermTime(fileInfo) 1296 1297 if err != nil { 1298 panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error())) 1299 } 1300 // We're only interested in knowing whether anything about the directory 1301 // has changed since last check, so we use the latest of the two 1302 // modification times (content modification (mtime) and 1303 // permission modification (ctime)) 1304 if permissionsChangeTime.After(modTime) { 1305 modTime = permissionsChangeTime 1306 } 1307 stats.ModTime = modTime.UnixNano() 1308 1309 return stats 1310} 1311 1312// pruneCacheCandidates removes the items that we don't want to include in our persistent cache 1313func (f *Finder) pruneCacheCandidates(items *DirEntries) { 1314 1315 for _, fileName := range items.FileNames { 1316 for _, abortedName := range f.cacheMetadata.Config.PruneFiles { 1317 if fileName == abortedName { 1318 items.FileNames = []string{} 1319 items.DirNames = []string{} 1320 return 1321 } 1322 } 1323 } 1324 1325 // remove any files that aren't the ones we want to include 1326 writeIndex := 0 1327 for _, fileName := range items.FileNames { 1328 // include only these files 1329 for _, includedName := range f.cacheMetadata.Config.IncludeFiles { 1330 if fileName == includedName { 1331 items.FileNames[writeIndex] = fileName 1332 writeIndex++ 1333 break 1334 } 1335 } 1336 } 1337 // resize 1338 items.FileNames = items.FileNames[:writeIndex] 1339 1340 writeIndex = 0 1341 for _, dirName := range items.DirNames { 1342 items.DirNames[writeIndex] = dirName 1343 // ignore other dirs that are known to not be inputs to the build process 1344 include := true 1345 for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs { 1346 if dirName == excludedName { 1347 // don't include 1348 include = false 1349 break 1350 } 1351 } 1352 if include { 1353 writeIndex++ 1354 } 1355 } 1356 // resize 1357 items.DirNames = items.DirNames[:writeIndex] 1358} 1359 1360func (f *Finder) listDirsAsync(nodes []*pathMap) { 1361 f.threadPool.Run( 1362 func() { 1363 for i := range nodes { 1364 f.listDirSync(nodes[i]) 1365 } 1366 }, 1367 ) 1368} 1369 1370func (f *Finder) listDirAsync(node *pathMap) { 1371 f.threadPool.Run( 1372 func() { 1373 f.listDirSync(node) 1374 }, 1375 ) 1376} 1377 1378func (f *Finder) listDirSync(dir *pathMap) { 1379 path := dir.path 1380 children, err := f.filesystem.ReadDir(path) 1381 1382 if err != nil { 1383 // possibly record this error 1384 f.onFsError(path, err) 1385 // if listing the contents of the directory fails (presumably due to 1386 // permission denied), then treat the directory as empty 1387 children = nil 1388 } 1389 1390 var subdirs []string 1391 var subfiles []string 1392 1393 for _, child := range children { 1394 linkBits := child.Mode() & os.ModeSymlink 1395 isLink := linkBits != 0 1396 if child.IsDir() { 1397 if !isLink { 1398 // Skip symlink dirs. 1399 // We don't have to support symlink dirs because 1400 // that would cause duplicates. 1401 subdirs = append(subdirs, child.Name()) 1402 } 1403 } else { 1404 // We do have to support symlink files because the link name might be 1405 // different than the target name 1406 // (for example, Android.bp -> build/soong/root.bp) 1407 subfiles = append(subfiles, child.Name()) 1408 } 1409 1410 } 1411 parentNode := dir 1412 1413 entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles} 1414 f.pruneCacheCandidates(entry) 1415 1416 // create a pathMap node for each relevant subdirectory 1417 relevantChildren := map[string]*pathMap{} 1418 for _, subdirName := range entry.DirNames { 1419 childNode, found := parentNode.children[subdirName] 1420 // if we already knew of this directory, then we already have a request pending to Stat it 1421 // if we didn't already know of this directory, then we must Stat it now 1422 if !found { 1423 childNode = parentNode.newChild(subdirName) 1424 f.statDirAsync(childNode) 1425 } 1426 relevantChildren[subdirName] = childNode 1427 } 1428 // Note that in rare cases, it's possible that we're reducing the set of 1429 // children via this statement, if these are all true: 1430 // 1. we previously had a cache that knew about subdirectories of parentNode 1431 // 2. the user created a prune-file (described in pruneCacheCandidates) 1432 // inside <parentNode>, which specifies that the contents of parentNode 1433 // are to be ignored. 1434 // The fact that it's possible to remove children here means that *pathMap structs 1435 // must not be looked up from f.nodes by filepath (and instead must be accessed by 1436 // direct pointer) until after every listDirSync completes 1437 parentNode.FileNames = entry.FileNames 1438 parentNode.children = relevantChildren 1439 1440} 1441 1442// listMatches takes a node and a function that specifies which subdirectories and 1443// files to include, and listMatches returns the matches 1444func (f *Finder) listMatches(node *pathMap, 1445 filter WalkFunc) (subDirs []*pathMap, filePaths []string) { 1446 entries := DirEntries{ 1447 FileNames: node.FileNames, 1448 } 1449 entries.DirNames = make([]string, 0, len(node.children)) 1450 for childName := range node.children { 1451 entries.DirNames = append(entries.DirNames, childName) 1452 } 1453 1454 dirNames, fileNames := filter(entries) 1455 1456 subDirs = []*pathMap{} 1457 filePaths = make([]string, 0, len(fileNames)) 1458 for _, fileName := range fileNames { 1459 filePaths = append(filePaths, joinCleanPaths(node.path, fileName)) 1460 } 1461 subDirs = make([]*pathMap, 0, len(dirNames)) 1462 for _, childName := range dirNames { 1463 child, ok := node.children[childName] 1464 if ok { 1465 subDirs = append(subDirs, child) 1466 } 1467 } 1468 1469 return subDirs, filePaths 1470} 1471 1472// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache. 1473func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc, 1474 approxNumThreads int) []string { 1475 1476 if approxNumThreads < 2 { 1477 // Done spawning threads; process remaining directories 1478 return f.findInCacheSinglethreaded(node, filter) 1479 } 1480 1481 totalWork := 0 1482 for _, child := range node.children { 1483 totalWork += child.approximateNumDescendents 1484 } 1485 childrenResults := make(chan []string, len(node.children)) 1486 1487 subDirs, filePaths := f.listMatches(node, filter) 1488 1489 // process child directories 1490 for _, child := range subDirs { 1491 numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork 1492 childProcessor := func(child *pathMap) { 1493 childResults := f.findInCacheMultithreaded(child, filter, numChildThreads) 1494 childrenResults <- childResults 1495 } 1496 // If we're allowed to use more than 1 thread to process this directory, 1497 // then instead we use 1 thread for each subdirectory. 1498 // It would be strange to spawn threads for only some subdirectories. 1499 go childProcessor(child) 1500 } 1501 1502 // collect results 1503 for i := 0; i < len(subDirs); i++ { 1504 childResults := <-childrenResults 1505 filePaths = append(filePaths, childResults...) 1506 } 1507 close(childrenResults) 1508 1509 return filePaths 1510} 1511 1512// findInCacheSinglethreaded synchronously searches the cache for all matching file paths 1513// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive 1514func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string { 1515 if node == nil { 1516 return []string{} 1517 } 1518 1519 nodes := []*pathMap{node} 1520 matches := []string{} 1521 1522 for len(nodes) > 0 { 1523 currentNode := nodes[0] 1524 nodes = nodes[1:] 1525 1526 subDirs, filePaths := f.listMatches(currentNode, filter) 1527 1528 nodes = append(nodes, subDirs...) 1529 1530 matches = append(matches, filePaths...) 1531 } 1532 return matches 1533} 1534