• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package finder
16
17import (
18	"bufio"
19	"bytes"
20	"encoding/json"
21	"errors"
22	"fmt"
23	"io"
24	"os"
25	"path/filepath"
26	"runtime"
27	"sort"
28	"strings"
29	"sync"
30	"sync/atomic"
31	"time"
32
33	"android/soong/finder/fs"
34)
35
36// This file provides a Finder struct that can quickly search for files satisfying
37// certain criteria.
38// This Finder gets its speed partially from parallelism and partially from caching.
39// If a Stat call returns the same result as last time, then it means Finder
40// can skip the ReadDir call for that dir.
41
42// The primary data structure used by the finder is the field Finder.nodes ,
43// which is a tree of nodes of type *pathMap .
44// Each node represents a directory on disk, along with its stats, subdirectories,
45// and contained files.
46
47// The common use case for the Finder is that the caller creates a Finder and gives
48// it the same query that was given to it in the previous execution.
49// In this situation, the major events that take place are:
50// 1. The Finder begins to load its db
51// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
52//    Calling Stat on each of these directories is generally a large fraction of the total time
53// 3. The Finder begins to construct a separate tree of nodes in each of its threads
54// 4. The Finder merges the individual node trees into the main node tree
55// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
56//    These ReadDir calls might prompt additional Stat calls, etc
57// 6. The Finder waits for all loading to complete
58// 7. The Finder searches the cache for files matching the user's query (using multiple threads)
59
60// These are the invariants regarding concurrency:
61// 1. The public methods of Finder are threadsafe.
62//      The public methods are only performance-optimized for one caller at a time, however.
63//      For the moment, multiple concurrent callers shouldn't expect any better performance than
64//      multiple serial callers.
65// 2. While building the node tree, only one thread may ever access the <children> collection of a
66//    *pathMap at once.
67//    a) The thread that accesses the <children> collection is the thread that discovers the
68//       children (by reading them from the cache or by having received a response to ReadDir).
69//       1) Consequently, the thread that discovers the children also spawns requests to stat
70//          subdirs.
71//    b) Consequently, while building the node tree, no thread may do a lookup of its
72//       *pathMap via filepath because another thread may be adding children to the
73//       <children> collection of an ancestor node. Additionally, in rare cases, another thread
74//       may be removing children from an ancestor node if the children were only discovered to
75//       be irrelevant after calling ReadDir (which happens if a prune-file was just added).
76// 3. No query will begin to be serviced until all loading (both reading the db
77//    and scanning the filesystem) is complete.
78//    Tests indicate that it only takes about 10% as long to search the in-memory cache as to
79//    generate it, making this not a huge loss in performance.
80// 4. The parsing of the db and the initial setup of the pathMap tree must complete before
81//      beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
82
83// see cmd/finder.go or finder_test.go for usage examples
84
85// Update versionString whenever making a backwards-incompatible change to the cache file format
86const versionString = "Android finder version 1"
87
88// a CacheParams specifies which files and directories the user wishes be scanned and
89// potentially added to the cache
90type CacheParams struct {
91	// WorkingDirectory is used as a base for any relative file paths given to the Finder
92	WorkingDirectory string
93
94	// RootDirs are the root directories used to initiate the search
95	RootDirs []string
96
97	// ExcludeDirs are directory names that if encountered are removed from the search
98	ExcludeDirs []string
99
100	// PruneFiles are file names that if encountered prune their entire directory
101	// (including siblings)
102	PruneFiles []string
103
104	// IncludeFiles are file names to include as matches
105	IncludeFiles []string
106}
107
108// a cacheConfig stores the inputs that determine what should be included in the cache
109type cacheConfig struct {
110	CacheParams
111
112	// FilesystemView is a unique identifier telling which parts of which file systems
113	// are readable by the Finder. In practice its value is essentially username@hostname.
114	// FilesystemView is set to ensure that a cache file copied to another host or
115	// found by another user doesn't inadvertently get reused.
116	FilesystemView string
117}
118
119func (p *cacheConfig) Dump() ([]byte, error) {
120	bytes, err := json.Marshal(p)
121	return bytes, err
122}
123
124// a cacheMetadata stores version information about the cache
125type cacheMetadata struct {
126	// The Version enables the Finder to determine whether it can even parse the file
127	// If the version changes, the entire cache file must be regenerated
128	Version string
129
130	// The CacheParams enables the Finder to determine whether the parameters match
131	// If the CacheParams change, the Finder can choose how much of the cache file to reuse
132	// (although in practice, the Finder will probably choose to ignore the entire file anyway)
133	Config cacheConfig
134}
135
136type Logger interface {
137	Output(calldepth int, s string) error
138}
139
140// the Finder is the main struct that callers will want to use
141type Finder struct {
142	// configuration
143	DbPath              string
144	numDbLoadingThreads int
145	numSearchingThreads int
146	cacheMetadata       cacheMetadata
147	logger              Logger
148	filesystem          fs.FileSystem
149
150	// temporary state
151	threadPool        *threadPool
152	mutex             sync.Mutex
153	fsErrs            []fsErr
154	errlock           sync.Mutex
155	shutdownWaitgroup sync.WaitGroup
156
157	// non-temporary state
158	modifiedFlag int32
159	nodes        pathMap
160}
161
162var defaultNumThreads = runtime.NumCPU() * 2
163
164// New creates a new Finder for use
165func New(cacheParams CacheParams, filesystem fs.FileSystem,
166	logger Logger, dbPath string) (f *Finder, err error) {
167	return newImpl(cacheParams, filesystem, logger, dbPath, defaultNumThreads)
168}
169
170// newImpl is like New but accepts more params
171func newImpl(cacheParams CacheParams, filesystem fs.FileSystem,
172	logger Logger, dbPath string, numThreads int) (f *Finder, err error) {
173	numDbLoadingThreads := numThreads
174	numSearchingThreads := numThreads
175
176	metadata := cacheMetadata{
177		Version: versionString,
178		Config: cacheConfig{
179			CacheParams:    cacheParams,
180			FilesystemView: filesystem.ViewId(),
181		},
182	}
183
184	f = &Finder{
185		numDbLoadingThreads: numDbLoadingThreads,
186		numSearchingThreads: numSearchingThreads,
187		cacheMetadata:       metadata,
188		logger:              logger,
189		filesystem:          filesystem,
190
191		nodes:  *newPathMap("/"),
192		DbPath: dbPath,
193
194		shutdownWaitgroup: sync.WaitGroup{},
195	}
196
197	f.loadFromFilesystem()
198
199	// check for any filesystem errors
200	err = f.getErr()
201	if err != nil {
202		return nil, err
203	}
204
205	// confirm that every path mentioned in the CacheConfig exists
206	for _, path := range cacheParams.RootDirs {
207		if !filepath.IsAbs(path) {
208			path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
209		}
210		node := f.nodes.GetNode(filepath.Clean(path), false)
211		if node == nil || node.ModTime == 0 {
212			return nil, fmt.Errorf("path %v was specified to be included in the cache but does not exist\n", path)
213		}
214	}
215
216	return f, nil
217}
218
219// FindNamed searches for every cached file
220func (f *Finder) FindAll() []string {
221	return f.FindAt("/")
222}
223
224// FindNamed searches for every cached file under <rootDir>
225func (f *Finder) FindAt(rootDir string) []string {
226	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
227		return entries.DirNames, entries.FileNames
228	}
229	return f.FindMatching(rootDir, filter)
230}
231
232// FindNamed searches for every cached file named <fileName>
233func (f *Finder) FindNamed(fileName string) []string {
234	return f.FindNamedAt("/", fileName)
235}
236
237// FindNamedAt searches under <rootPath> for every file named <fileName>
238// The reason a caller might use FindNamedAt instead of FindNamed is if they want
239// to limit their search to a subset of the cache
240func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
241	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
242		matches := []string{}
243		for _, foundName := range entries.FileNames {
244			if foundName == fileName {
245				matches = append(matches, foundName)
246			}
247		}
248		return entries.DirNames, matches
249
250	}
251	return f.FindMatching(rootPath, filter)
252}
253
254// FindFirstNamed searches for every file named <fileName>
255// Whenever it finds a match, it stops search subdirectories
256func (f *Finder) FindFirstNamed(fileName string) []string {
257	return f.FindFirstNamedAt("/", fileName)
258}
259
260// FindFirstNamedAt searches for every file named <fileName>
261// Whenever it finds a match, it stops search subdirectories
262func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
263	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
264		matches := []string{}
265		for _, foundName := range entries.FileNames {
266			if foundName == fileName {
267				matches = append(matches, foundName)
268			}
269		}
270
271		if len(matches) > 0 {
272			return []string{}, matches
273		}
274		return entries.DirNames, matches
275	}
276	return f.FindMatching(rootPath, filter)
277}
278
279// FindMatching is the most general exported function for searching for files in the cache
280// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
281// in place, removing file paths and directories as desired.
282// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
283func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
284	// set up some parameters
285	scanStart := time.Now()
286	var isRel bool
287	workingDir := f.cacheMetadata.Config.WorkingDirectory
288
289	isRel = !filepath.IsAbs(rootPath)
290	if isRel {
291		rootPath = filepath.Join(workingDir, rootPath)
292	}
293
294	rootPath = filepath.Clean(rootPath)
295
296	// ensure nothing else is using the Finder
297	f.verbosef("FindMatching waiting for finder to be idle\n")
298	f.lock()
299	defer f.unlock()
300
301	node := f.nodes.GetNode(rootPath, false)
302	if node == nil {
303		f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
304			rootPath, f.cacheMetadata.Config.CacheParams)
305		// path is not found; don't do a search
306		return []string{}
307	}
308
309	// search for matching files
310	f.verbosef("Finder finding %v using cache\n", rootPath)
311	results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
312
313	// format and return results
314	if isRel {
315		for i := 0; i < len(results); i++ {
316			results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
317		}
318	}
319	sort.Strings(results)
320	f.verbosef("Found %v files under %v in %v using cache\n",
321		len(results), rootPath, time.Since(scanStart))
322	return results
323}
324
325// Shutdown declares that the finder is no longer needed and waits for its cleanup to complete
326// Currently, that only entails waiting for the database dump to complete.
327func (f *Finder) Shutdown() {
328	f.waitForDbDump()
329}
330
331// End of public api
332
333func (f *Finder) goDumpDb() {
334	if f.wasModified() {
335		f.shutdownWaitgroup.Add(1)
336		go func() {
337			err := f.dumpDb()
338			if err != nil {
339				f.verbosef("%v\n", err)
340			}
341			f.shutdownWaitgroup.Done()
342		}()
343	} else {
344		f.verbosef("Skipping dumping unmodified db\n")
345	}
346}
347
348func (f *Finder) waitForDbDump() {
349	f.shutdownWaitgroup.Wait()
350}
351
352// joinCleanPaths is like filepath.Join but is faster because
353// joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
354func joinCleanPaths(base string, leaf string) string {
355	if base == "" {
356		return leaf
357	}
358	if base == "/" {
359		return base + leaf
360	}
361	if leaf == "" {
362		return base
363	}
364	return base + "/" + leaf
365}
366
367func (f *Finder) verbosef(format string, args ...interface{}) {
368	f.logger.Output(2, fmt.Sprintf(format, args...))
369}
370
371// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
372func (f *Finder) loadFromFilesystem() {
373	f.threadPool = newThreadPool(f.numDbLoadingThreads)
374
375	err := f.startFromExternalCache()
376	if err != nil {
377		f.startWithoutExternalCache()
378	}
379
380	f.goDumpDb()
381
382	f.threadPool = nil
383}
384
385func (f *Finder) startFind(path string) {
386	if !filepath.IsAbs(path) {
387		path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
388	}
389	node := f.nodes.GetNode(path, true)
390	f.statDirAsync(node)
391}
392
393func (f *Finder) lock() {
394	f.mutex.Lock()
395}
396
397func (f *Finder) unlock() {
398	f.mutex.Unlock()
399}
400
401// a statResponse is the relevant portion of the response from the filesystem to a Stat call
402type statResponse struct {
403	ModTime int64
404	Inode   uint64
405	Device  uint64
406}
407
408// a pathAndStats stores a path and its stats
409type pathAndStats struct {
410	statResponse
411
412	Path string
413}
414
415// a dirFullInfo stores all of the relevant information we know about a directory
416type dirFullInfo struct {
417	pathAndStats
418
419	FileNames []string
420}
421
422// a PersistedDirInfo is the information about a dir that we save to our cache on disk
423type PersistedDirInfo struct {
424	// These field names are short because they are repeated many times in the output json file
425	P string   // path
426	T int64    // modification time
427	I uint64   // inode number
428	F []string // relevant filenames contained
429}
430
431// a PersistedDirs is the information that we persist for a group of dirs
432type PersistedDirs struct {
433	// the device on which each directory is stored
434	Device uint64
435	// the common root path to which all contained dirs are relative
436	Root string
437	// the directories themselves
438	Dirs []PersistedDirInfo
439}
440
441// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
442type CacheEntry []PersistedDirs
443
444// a DirEntries lists the files and directories contained directly within a specific directory
445type DirEntries struct {
446	Path string
447
448	// elements of DirNames are just the dir names; they don't include any '/' character
449	DirNames []string
450	// elements of FileNames are just the file names; they don't include '/' character
451	FileNames []string
452}
453
454// a WalkFunc is the type that is passed into various Find functions for determining which
455// directories the caller wishes be walked. The WalkFunc is expected to decide which
456// directories to walk and which files to consider as matches to the original query.
457type WalkFunc func(DirEntries) (dirs []string, files []string)
458
459// a mapNode stores the relevant stats about a directory to be stored in a pathMap
460type mapNode struct {
461	statResponse
462	FileNames []string
463}
464
465// a pathMap implements the directory tree structure of nodes
466type pathMap struct {
467	mapNode
468
469	path string
470
471	children map[string]*pathMap
472
473	// number of descendent nodes, including self
474	approximateNumDescendents int
475}
476
477func newPathMap(path string) *pathMap {
478	result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
479		approximateNumDescendents: 1}
480	return result
481}
482
483// GetNode returns the node at <path>
484func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
485	if len(path) > 0 && path[0] == '/' {
486		path = path[1:]
487	}
488
489	node := m
490	for {
491		if path == "" {
492			return node
493		}
494
495		index := strings.Index(path, "/")
496		var firstComponent string
497		if index >= 0 {
498			firstComponent = path[:index]
499			path = path[index+1:]
500		} else {
501			firstComponent = path
502			path = ""
503		}
504
505		child, found := node.children[firstComponent]
506
507		if !found {
508			if createIfNotFound {
509				child = node.newChild(firstComponent)
510			} else {
511				return nil
512			}
513		}
514
515		node = child
516	}
517}
518
519func (m *pathMap) newChild(name string) (child *pathMap) {
520	path := joinCleanPaths(m.path, name)
521	newChild := newPathMap(path)
522	m.children[name] = newChild
523
524	return m.children[name]
525}
526
527func (m *pathMap) UpdateNumDescendents() int {
528	count := 1
529	for _, child := range m.children {
530		count += child.approximateNumDescendents
531	}
532	m.approximateNumDescendents = count
533	return count
534}
535
536func (m *pathMap) UpdateNumDescendentsRecursive() {
537	for _, child := range m.children {
538		child.UpdateNumDescendentsRecursive()
539	}
540	m.UpdateNumDescendents()
541}
542
543func (m *pathMap) MergeIn(other *pathMap) {
544	for key, theirs := range other.children {
545		ours, found := m.children[key]
546		if found {
547			ours.MergeIn(theirs)
548		} else {
549			m.children[key] = theirs
550		}
551	}
552	if other.ModTime != 0 {
553		m.mapNode = other.mapNode
554	}
555	m.UpdateNumDescendents()
556}
557
558func (m *pathMap) DumpAll() []dirFullInfo {
559	results := []dirFullInfo{}
560	m.dumpInto("", &results)
561	return results
562}
563
564func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
565	*results = append(*results,
566		dirFullInfo{
567			pathAndStats{statResponse: m.statResponse, Path: path},
568			m.FileNames},
569	)
570	for key, child := range m.children {
571		childPath := joinCleanPaths(path, key)
572		if len(childPath) == 0 || childPath[0] != '/' {
573			childPath = "/" + childPath
574		}
575		child.dumpInto(childPath, results)
576	}
577}
578
579// a semaphore can be locked by up to <capacity> callers at once
580type semaphore struct {
581	pool chan bool
582}
583
584func newSemaphore(capacity int) *semaphore {
585	return &semaphore{pool: make(chan bool, capacity)}
586}
587
588func (l *semaphore) Lock() {
589	l.pool <- true
590}
591
592func (l *semaphore) Unlock() {
593	<-l.pool
594}
595
596// A threadPool runs goroutines and supports throttling and waiting.
597// Without throttling, Go may exhaust the maximum number of various resources, such as
598// threads or file descriptors, and crash the program.
599type threadPool struct {
600	receivedRequests sync.WaitGroup
601	activeRequests   semaphore
602}
603
604func newThreadPool(maxNumConcurrentThreads int) *threadPool {
605	return &threadPool{
606		receivedRequests: sync.WaitGroup{},
607		activeRequests:   *newSemaphore(maxNumConcurrentThreads),
608	}
609}
610
611// Run requests to run the given function in its own goroutine
612func (p *threadPool) Run(function func()) {
613	p.receivedRequests.Add(1)
614	// If Run() was called from within a goroutine spawned by this threadPool,
615	// then we may need to return from Run() before having capacity to actually
616	// run <function>.
617	//
618	// It's possible that the body of <function> contains a statement (such as a syscall)
619	// that will cause Go to pin it to a thread, or will contain a statement that uses
620	// another resource that is in short supply (such as a file descriptor), so we can't
621	// actually run <function> until we have capacity.
622	//
623	// However, the semaphore used for synchronization is implemented via a channel and
624	// shouldn't require a new thread for each access.
625	go func() {
626		p.activeRequests.Lock()
627		function()
628		p.activeRequests.Unlock()
629		p.receivedRequests.Done()
630	}()
631}
632
633// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
634func (p *threadPool) Wait() {
635	p.receivedRequests.Wait()
636}
637
638type fsErr struct {
639	path string
640	err  error
641}
642
643func (e fsErr) String() string {
644	return e.path + ": " + e.err.Error()
645}
646
647func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
648	// group each dirFullInfo by its Device, to avoid having to repeat it in the output
649	dirsByDevice := map[uint64][]PersistedDirInfo{}
650	for _, entry := range dirInfos {
651		_, found := dirsByDevice[entry.Device]
652		if !found {
653			dirsByDevice[entry.Device] = []PersistedDirInfo{}
654		}
655		dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
656			PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
657	}
658
659	cacheEntry := CacheEntry{}
660
661	for device, infos := range dirsByDevice {
662		// find common prefix
663		prefix := ""
664		if len(infos) > 0 {
665			prefix = infos[0].P
666		}
667		for _, info := range infos {
668			for !strings.HasPrefix(info.P+"/", prefix+"/") {
669				prefix = filepath.Dir(prefix)
670				if prefix == "/" {
671					break
672				}
673			}
674		}
675		// remove common prefix
676		for i := range infos {
677			suffix := strings.Replace(infos[i].P, prefix, "", 1)
678			if len(suffix) > 0 && suffix[0] == '/' {
679				suffix = suffix[1:]
680			}
681			infos[i].P = suffix
682		}
683
684		// turn the map (keyed by device) into a list of structs with labeled fields
685		// this is to improve readability of the output
686		cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
687	}
688
689	// convert to json.
690	// it would save some space to use a different format than json for the db file,
691	// but the space and time savings are small, and json is easy for humans to read
692	bytes, err := json.Marshal(cacheEntry)
693	return bytes, err
694}
695
696func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
697	var cacheEntry CacheEntry
698	err := json.Unmarshal(bytes, &cacheEntry)
699	if err != nil {
700		return nil, err
701	}
702
703	// convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
704	capacity := 0
705	for _, element := range cacheEntry {
706		capacity += len(element.Dirs)
707	}
708	nodes := make([]dirFullInfo, capacity)
709	count := 0
710	for _, element := range cacheEntry {
711		for _, dir := range element.Dirs {
712			path := joinCleanPaths(element.Root, dir.P)
713
714			nodes[count] = dirFullInfo{
715				pathAndStats: pathAndStats{
716					statResponse: statResponse{
717						ModTime: dir.T, Inode: dir.I, Device: element.Device,
718					},
719					Path: path},
720				FileNames: dir.F}
721			count++
722		}
723	}
724	return nodes, nil
725}
726
727// We use the following separator byte to distinguish individually parseable blocks of json
728// because we know this separator won't appear in the json that we're parsing.
729//
730// The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
731// - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
732// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
733//   character.
734//
735// We know that the newline character will never appear in our json string, because:
736// - If a newline character appears as part of a data string, then json encoding will
737//   emit two characters instead: '\' and 'n'.
738// - The json encoder that we use doesn't emit the optional newlines between any of its
739//   other outputs.
740const lineSeparator = byte('\n')
741
742func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
743	return reader.ReadBytes(lineSeparator)
744}
745
746// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
747func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
748	cacheVersionBytes, err := f.readLine(cacheReader)
749	if err != nil {
750		f.verbosef("Failed to read database header; database is invalid\n")
751		return false
752	}
753	if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
754		cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
755	}
756	cacheVersionString := string(cacheVersionBytes)
757	currentVersion := f.cacheMetadata.Version
758	if cacheVersionString != currentVersion {
759		f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
760		return false
761	}
762
763	cacheParamBytes, err := f.readLine(cacheReader)
764	if err != nil {
765		f.verbosef("Failed to read database search params; database is invalid\n")
766		return false
767	}
768
769	if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
770		cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
771	}
772
773	currentParamBytes, err := f.cacheMetadata.Config.Dump()
774	if err != nil {
775		panic("Finder failed to serialize its parameters")
776	}
777	cacheParamString := string(cacheParamBytes)
778	currentParamString := string(currentParamBytes)
779	if cacheParamString != currentParamString {
780		f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
781		return false
782	}
783	return true
784}
785
786// loadBytes compares the cache info in <data> to the state of the filesystem
787// loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
788func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
789
790	helperStartTime := time.Now()
791
792	cachedNodes, err := f.parseCacheEntry(data)
793	if err != nil {
794		return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
795	}
796
797	unmarshalDate := time.Now()
798	f.verbosef("Unmarshaled %v objects for %v in %v\n",
799		len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
800
801	tempMap := newPathMap("/")
802	stats := make([]statResponse, len(cachedNodes))
803
804	for i, node := range cachedNodes {
805		// check the file system for an updated timestamp
806		stats[i] = f.statDirSync(node.Path)
807	}
808
809	dirsToWalk = []string{}
810	for i, cachedNode := range cachedNodes {
811		updated := stats[i]
812		// save the cached value
813		container := tempMap.GetNode(cachedNode.Path, true)
814		container.mapNode = mapNode{statResponse: updated}
815
816		// if the metadata changed and the directory still exists, then
817		// make a note to walk it later
818		if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
819			f.setModified()
820			// make a note that the directory needs to be walked
821			dirsToWalk = append(dirsToWalk, cachedNode.Path)
822		} else {
823			container.mapNode.FileNames = cachedNode.FileNames
824		}
825	}
826	// count the number of nodes to improve our understanding of the shape of the tree,
827	// thereby improving parallelism of subsequent searches
828	tempMap.UpdateNumDescendentsRecursive()
829
830	f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
831	return tempMap, dirsToWalk, nil
832}
833
834// startFromExternalCache loads the cache database from disk
835// startFromExternalCache waits to return until the load of the cache db is complete, but
836// startFromExternalCache does not wait for all every listDir() or statDir() request to complete
837func (f *Finder) startFromExternalCache() (err error) {
838	startTime := time.Now()
839	dbPath := f.DbPath
840
841	// open cache file and validate its header
842	reader, err := f.filesystem.Open(dbPath)
843	if err != nil {
844		return errors.New("No data to load from database\n")
845	}
846	bufferedReader := bufio.NewReader(reader)
847	if !f.validateCacheHeader(bufferedReader) {
848		return errors.New("Cache header does not match")
849	}
850	f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
851
852	// read the file and spawn threads to process it
853	nodesToWalk := [][]*pathMap{}
854	mainTree := newPathMap("/")
855
856	// read the blocks and stream them into <blockChannel>
857	type dataBlock struct {
858		id   int
859		err  error
860		data []byte
861	}
862	blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
863	readBlocks := func() {
864		index := 0
865		for {
866			// It takes some time to unmarshal the input from json, so we want
867			// to unmarshal it in parallel. In order to find valid places to
868			// break the input, we scan for the line separators that we inserted
869			// (for this purpose) when we dumped the database.
870			data, err := f.readLine(bufferedReader)
871			var response dataBlock
872			done := false
873			if err != nil && err != io.EOF {
874				response = dataBlock{id: index, err: err, data: nil}
875				done = true
876			} else {
877				done = (err == io.EOF)
878				response = dataBlock{id: index, err: nil, data: data}
879			}
880			blockChannel <- response
881			index++
882			duration := time.Since(startTime)
883			f.verbosef("Read block %v after %v\n", index, duration)
884			if done {
885				f.verbosef("Read %v blocks in %v\n", index, duration)
886				close(blockChannel)
887				return
888			}
889		}
890	}
891	go readBlocks()
892
893	// Read from <blockChannel> and stream the responses into <resultChannel>.
894	type workResponse struct {
895		id          int
896		err         error
897		tree        *pathMap
898		updatedDirs []string
899	}
900	resultChannel := make(chan workResponse)
901	processBlocks := func() {
902		numProcessed := 0
903		threadPool := newThreadPool(f.numDbLoadingThreads)
904		for {
905			// get a block to process
906			block, received := <-blockChannel
907			if !received {
908				break
909			}
910
911			if block.err != nil {
912				resultChannel <- workResponse{err: block.err}
913				break
914			}
915			numProcessed++
916			// wait until there is CPU available to process it
917			threadPool.Run(
918				func() {
919					processStartTime := time.Now()
920					f.verbosef("Starting to process block %v after %v\n",
921						block.id, processStartTime.Sub(startTime))
922					tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
923					var response workResponse
924					if err != nil {
925						f.verbosef(
926							"Block %v failed to parse with error %v\n",
927							block.id, err)
928						response = workResponse{err: err}
929					} else {
930						response = workResponse{
931							id:          block.id,
932							err:         nil,
933							tree:        tempMap,
934							updatedDirs: updatedDirs,
935						}
936					}
937					f.verbosef("Processed block %v in %v\n",
938						block.id, time.Since(processStartTime),
939					)
940					resultChannel <- response
941				},
942			)
943		}
944		threadPool.Wait()
945		f.verbosef("Finished processing %v blocks in %v\n",
946			numProcessed, time.Since(startTime))
947		close(resultChannel)
948	}
949	go processBlocks()
950
951	// Read from <resultChannel> and use the results
952	combineResults := func() (err error) {
953		for {
954			result, received := <-resultChannel
955			if !received {
956				break
957			}
958			if err != nil {
959				// In case of an error, wait for work to complete before
960				// returning the error. This ensures that any subsequent
961				// work doesn't need to compete for resources (and possibly
962				// fail due to, for example, a filesystem limit on the number of
963				// concurrently open files) with past work.
964				continue
965			}
966			if result.err != nil {
967				err = result.err
968				continue
969			}
970			// update main tree
971			mainTree.MergeIn(result.tree)
972			// record any new directories that we will need to Stat()
973			updatedNodes := make([]*pathMap, len(result.updatedDirs))
974			for j, dir := range result.updatedDirs {
975				node := mainTree.GetNode(dir, false)
976				updatedNodes[j] = node
977			}
978			nodesToWalk = append(nodesToWalk, updatedNodes)
979		}
980		return err
981	}
982	err = combineResults()
983	if err != nil {
984		return err
985	}
986
987	f.nodes = *mainTree
988
989	// after having loaded the entire db and therefore created entries for
990	// the directories we know of, now it's safe to start calling ReadDir on
991	// any updated directories
992	for i := range nodesToWalk {
993		f.listDirsAsync(nodesToWalk[i])
994	}
995	f.verbosef("Loaded db and statted known dirs in %v\n", time.Since(startTime))
996	f.threadPool.Wait()
997	f.verbosef("Loaded db and statted all dirs in %v\n", time.Now().Sub(startTime))
998
999	return err
1000}
1001
1002// startWithoutExternalCache starts scanning the filesystem according to the cache config
1003// startWithoutExternalCache should be called if startFromExternalCache is not applicable
1004func (f *Finder) startWithoutExternalCache() {
1005	startTime := time.Now()
1006	configDirs := f.cacheMetadata.Config.RootDirs
1007
1008	// clean paths
1009	candidates := make([]string, len(configDirs))
1010	for i, dir := range configDirs {
1011		candidates[i] = filepath.Clean(dir)
1012	}
1013	// remove duplicates
1014	dirsToScan := make([]string, 0, len(configDirs))
1015	for _, candidate := range candidates {
1016		include := true
1017		for _, included := range dirsToScan {
1018			if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
1019				include = false
1020				break
1021			}
1022		}
1023		if include {
1024			dirsToScan = append(dirsToScan, candidate)
1025		}
1026	}
1027
1028	// start searching finally
1029	for _, path := range dirsToScan {
1030		f.verbosef("Starting find of %v\n", path)
1031		f.startFind(path)
1032	}
1033
1034	f.threadPool.Wait()
1035
1036	f.verbosef("Scanned filesystem (not using cache) in %v\n", time.Now().Sub(startTime))
1037}
1038
1039// isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
1040func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
1041	if old.Inode != new.Inode {
1042		return false
1043	}
1044	if old.ModTime != new.ModTime {
1045		return false
1046	}
1047	if old.Device != new.Device {
1048		return false
1049	}
1050	return true
1051}
1052
1053func (f *Finder) wasModified() bool {
1054	return atomic.LoadInt32(&f.modifiedFlag) > 0
1055}
1056
1057func (f *Finder) setModified() {
1058	var newVal int32
1059	newVal = 1
1060	atomic.StoreInt32(&f.modifiedFlag, newVal)
1061}
1062
1063// sortedDirEntries exports directory entries to facilitate dumping them to the external cache
1064func (f *Finder) sortedDirEntries() []dirFullInfo {
1065	startTime := time.Now()
1066	nodes := make([]dirFullInfo, 0)
1067	for _, node := range f.nodes.DumpAll() {
1068		if node.ModTime != 0 {
1069			nodes = append(nodes, node)
1070		}
1071	}
1072	discoveryDate := time.Now()
1073	f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
1074	less := func(i int, j int) bool {
1075		return nodes[i].Path < nodes[j].Path
1076	}
1077	sort.Slice(nodes, less)
1078	sortDate := time.Now()
1079	f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
1080
1081	return nodes
1082}
1083
1084// serializeDb converts the cache database into a form to save to disk
1085func (f *Finder) serializeDb() ([]byte, error) {
1086	// sort dir entries
1087	var entryList = f.sortedDirEntries()
1088
1089	// Generate an output file that can be conveniently loaded using the same number of threads
1090	// as were used in this execution (because presumably that will be the number of threads
1091	// used in the next execution too)
1092
1093	// generate header
1094	header := []byte{}
1095	header = append(header, []byte(f.cacheMetadata.Version)...)
1096	header = append(header, lineSeparator)
1097	configDump, err := f.cacheMetadata.Config.Dump()
1098	if err != nil {
1099		return nil, err
1100	}
1101	header = append(header, configDump...)
1102
1103	// serialize individual blocks in parallel
1104	numBlocks := f.numDbLoadingThreads
1105	if numBlocks > len(entryList) {
1106		numBlocks = len(entryList)
1107	}
1108	blocks := make([][]byte, 1+numBlocks)
1109	blocks[0] = header
1110	blockMin := 0
1111	wg := sync.WaitGroup{}
1112	var errLock sync.Mutex
1113
1114	for i := 1; i <= numBlocks; i++ {
1115		// identify next block
1116		blockMax := len(entryList) * i / numBlocks
1117		block := entryList[blockMin:blockMax]
1118
1119		// process block
1120		wg.Add(1)
1121		go func(index int, block []dirFullInfo) {
1122			byteBlock, subErr := f.serializeCacheEntry(block)
1123			f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
1124			if subErr != nil {
1125				f.verbosef("%v\n", subErr.Error())
1126				errLock.Lock()
1127				err = subErr
1128				errLock.Unlock()
1129			} else {
1130				blocks[index] = byteBlock
1131			}
1132			wg.Done()
1133		}(i, block)
1134
1135		blockMin = blockMax
1136	}
1137
1138	wg.Wait()
1139
1140	if err != nil {
1141		return nil, err
1142	}
1143
1144	content := bytes.Join(blocks, []byte{lineSeparator})
1145
1146	return content, nil
1147}
1148
1149// dumpDb saves the cache database to disk
1150func (f *Finder) dumpDb() error {
1151	startTime := time.Now()
1152	f.verbosef("Dumping db\n")
1153
1154	tempPath := f.DbPath + ".tmp"
1155
1156	bytes, err := f.serializeDb()
1157	if err != nil {
1158		return err
1159	}
1160	serializeDate := time.Now()
1161	f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
1162	// dump file and atomically move
1163	err = f.filesystem.WriteFile(tempPath, bytes, 0777)
1164	if err != nil {
1165		return err
1166	}
1167	err = f.filesystem.Rename(tempPath, f.DbPath)
1168	if err != nil {
1169		return err
1170	}
1171
1172	f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
1173	return nil
1174
1175}
1176
1177// canIgnoreFsErr checks for certain classes of filesystem errors that are safe to ignore
1178func (f *Finder) canIgnoreFsErr(err error) bool {
1179	pathErr, isPathErr := err.(*os.PathError)
1180	if !isPathErr {
1181		// Don't recognize this error
1182		return false
1183	}
1184	if os.IsPermission(pathErr) {
1185		// Permission errors are ignored:
1186		// https://issuetracker.google.com/37553659
1187		// https://github.com/google/kati/pull/116
1188		return true
1189	}
1190	if pathErr.Err == os.ErrNotExist {
1191		// If a directory doesn't exist, that generally means the cache is out-of-date
1192		return true
1193	}
1194	// Don't recognize this error
1195	return false
1196}
1197
1198// onFsError should be called whenever a potentially fatal error is returned from a filesystem call
1199func (f *Finder) onFsError(path string, err error) {
1200	if !f.canIgnoreFsErr(err) {
1201		// We could send the errors through a channel instead, although that would cause this call
1202		// to block unless we preallocated a sufficient buffer or spawned a reader thread.
1203		// Although it wouldn't be too complicated to spawn a reader thread, it's still slightly
1204		// more convenient to use a lock. Only in an unusual situation should this code be
1205		// invoked anyway.
1206		f.errlock.Lock()
1207		f.fsErrs = append(f.fsErrs, fsErr{path: path, err: err})
1208		f.errlock.Unlock()
1209	}
1210}
1211
1212// discardErrsForPrunedPaths removes any errors for paths that are no longer included in the cache
1213func (f *Finder) discardErrsForPrunedPaths() {
1214	// This function could be somewhat inefficient due to being single-threaded,
1215	// but the length of f.fsErrs should be approximately 0, so it shouldn't take long anyway.
1216	relevantErrs := make([]fsErr, 0, len(f.fsErrs))
1217	for _, fsErr := range f.fsErrs {
1218		path := fsErr.path
1219		node := f.nodes.GetNode(path, false)
1220		if node != nil {
1221			// The path in question wasn't pruned due to a failure to process a parent directory.
1222			// So, the failure to process this path is important
1223			relevantErrs = append(relevantErrs, fsErr)
1224		}
1225	}
1226	f.fsErrs = relevantErrs
1227}
1228
1229// getErr returns an error based on previous calls to onFsErr, if any
1230func (f *Finder) getErr() error {
1231	f.discardErrsForPrunedPaths()
1232
1233	numErrs := len(f.fsErrs)
1234	if numErrs < 1 {
1235		return nil
1236	}
1237
1238	maxNumErrsToInclude := 10
1239	message := ""
1240	if numErrs > maxNumErrsToInclude {
1241		message = fmt.Sprintf("finder encountered %v errors: %v...", numErrs, f.fsErrs[:maxNumErrsToInclude])
1242	} else {
1243		message = fmt.Sprintf("finder encountered %v errors: %v", numErrs, f.fsErrs)
1244	}
1245
1246	return errors.New(message)
1247}
1248
1249func (f *Finder) statDirAsync(dir *pathMap) {
1250	node := dir
1251	path := dir.path
1252	f.threadPool.Run(
1253		func() {
1254			updatedStats := f.statDirSync(path)
1255
1256			if !f.isInfoUpToDate(node.statResponse, updatedStats) {
1257				node.mapNode = mapNode{
1258					statResponse: updatedStats,
1259					FileNames:    []string{},
1260				}
1261				f.setModified()
1262				if node.statResponse.ModTime != 0 {
1263					// modification time was updated, so re-scan for
1264					// child directories
1265					f.listDirAsync(dir)
1266				}
1267			}
1268		},
1269	)
1270}
1271
1272func (f *Finder) statDirSync(path string) statResponse {
1273
1274	fileInfo, err := f.filesystem.Lstat(path)
1275
1276	var stats statResponse
1277	if err != nil {
1278		// possibly record this error
1279		f.onFsError(path, err)
1280		// in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
1281		return stats
1282	}
1283	modTime := fileInfo.ModTime()
1284	stats = statResponse{}
1285	inode, err := f.filesystem.InodeNumber(fileInfo)
1286	if err != nil {
1287		panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
1288	}
1289	stats.Inode = inode
1290	device, err := f.filesystem.DeviceNumber(fileInfo)
1291	if err != nil {
1292		panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
1293	}
1294	stats.Device = device
1295	permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
1296
1297	if err != nil {
1298		panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
1299	}
1300	// We're only interested in knowing whether anything about the directory
1301	// has changed since last check, so we use the latest of the two
1302	// modification times (content modification (mtime) and
1303	// permission modification (ctime))
1304	if permissionsChangeTime.After(modTime) {
1305		modTime = permissionsChangeTime
1306	}
1307	stats.ModTime = modTime.UnixNano()
1308
1309	return stats
1310}
1311
1312// pruneCacheCandidates removes the items that we don't want to include in our persistent cache
1313func (f *Finder) pruneCacheCandidates(items *DirEntries) {
1314
1315	for _, fileName := range items.FileNames {
1316		for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
1317			if fileName == abortedName {
1318				items.FileNames = []string{}
1319				items.DirNames = []string{}
1320				return
1321			}
1322		}
1323	}
1324
1325	// remove any files that aren't the ones we want to include
1326	writeIndex := 0
1327	for _, fileName := range items.FileNames {
1328		// include only these files
1329		for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
1330			if fileName == includedName {
1331				items.FileNames[writeIndex] = fileName
1332				writeIndex++
1333				break
1334			}
1335		}
1336	}
1337	// resize
1338	items.FileNames = items.FileNames[:writeIndex]
1339
1340	writeIndex = 0
1341	for _, dirName := range items.DirNames {
1342		items.DirNames[writeIndex] = dirName
1343		// ignore other dirs that are known to not be inputs to the build process
1344		include := true
1345		for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
1346			if dirName == excludedName {
1347				// don't include
1348				include = false
1349				break
1350			}
1351		}
1352		if include {
1353			writeIndex++
1354		}
1355	}
1356	// resize
1357	items.DirNames = items.DirNames[:writeIndex]
1358}
1359
1360func (f *Finder) listDirsAsync(nodes []*pathMap) {
1361	f.threadPool.Run(
1362		func() {
1363			for i := range nodes {
1364				f.listDirSync(nodes[i])
1365			}
1366		},
1367	)
1368}
1369
1370func (f *Finder) listDirAsync(node *pathMap) {
1371	f.threadPool.Run(
1372		func() {
1373			f.listDirSync(node)
1374		},
1375	)
1376}
1377
1378func (f *Finder) listDirSync(dir *pathMap) {
1379	path := dir.path
1380	children, err := f.filesystem.ReadDir(path)
1381
1382	if err != nil {
1383		// possibly record this error
1384		f.onFsError(path, err)
1385		// if listing the contents of the directory fails (presumably due to
1386		// permission denied), then treat the directory as empty
1387		children = nil
1388	}
1389
1390	var subdirs []string
1391	var subfiles []string
1392
1393	for _, child := range children {
1394		linkBits := child.Mode() & os.ModeSymlink
1395		isLink := linkBits != 0
1396		if child.IsDir() {
1397			if !isLink {
1398				// Skip symlink dirs.
1399				// We don't have to support symlink dirs because
1400				// that would cause duplicates.
1401				subdirs = append(subdirs, child.Name())
1402			}
1403		} else {
1404			// We do have to support symlink files because the link name might be
1405			// different than the target name
1406			// (for example, Android.bp -> build/soong/root.bp)
1407			subfiles = append(subfiles, child.Name())
1408		}
1409
1410	}
1411	parentNode := dir
1412
1413	entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
1414	f.pruneCacheCandidates(entry)
1415
1416	// create a pathMap node for each relevant subdirectory
1417	relevantChildren := map[string]*pathMap{}
1418	for _, subdirName := range entry.DirNames {
1419		childNode, found := parentNode.children[subdirName]
1420		// if we already knew of this directory, then we already have a request pending to Stat it
1421		// if we didn't already know of this directory, then we must Stat it now
1422		if !found {
1423			childNode = parentNode.newChild(subdirName)
1424			f.statDirAsync(childNode)
1425		}
1426		relevantChildren[subdirName] = childNode
1427	}
1428	// Note that in rare cases, it's possible that we're reducing the set of
1429	// children via this statement, if these are all true:
1430	// 1. we previously had a cache that knew about subdirectories of parentNode
1431	// 2. the user created a prune-file (described in pruneCacheCandidates)
1432	//    inside <parentNode>, which specifies that the contents of parentNode
1433	//    are to be ignored.
1434	// The fact that it's possible to remove children here means that *pathMap structs
1435	// must not be looked up from f.nodes by filepath (and instead must be accessed by
1436	// direct pointer) until after every listDirSync completes
1437	parentNode.FileNames = entry.FileNames
1438	parentNode.children = relevantChildren
1439
1440}
1441
1442// listMatches takes a node and a function that specifies which subdirectories and
1443// files to include, and listMatches returns the matches
1444func (f *Finder) listMatches(node *pathMap,
1445	filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
1446	entries := DirEntries{
1447		FileNames: node.FileNames,
1448	}
1449	entries.DirNames = make([]string, 0, len(node.children))
1450	for childName := range node.children {
1451		entries.DirNames = append(entries.DirNames, childName)
1452	}
1453
1454	dirNames, fileNames := filter(entries)
1455
1456	subDirs = []*pathMap{}
1457	filePaths = make([]string, 0, len(fileNames))
1458	for _, fileName := range fileNames {
1459		filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
1460	}
1461	subDirs = make([]*pathMap, 0, len(dirNames))
1462	for _, childName := range dirNames {
1463		child, ok := node.children[childName]
1464		if ok {
1465			subDirs = append(subDirs, child)
1466		}
1467	}
1468
1469	return subDirs, filePaths
1470}
1471
1472// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
1473func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
1474	approxNumThreads int) []string {
1475
1476	if approxNumThreads < 2 {
1477		// Done spawning threads; process remaining directories
1478		return f.findInCacheSinglethreaded(node, filter)
1479	}
1480
1481	totalWork := 0
1482	for _, child := range node.children {
1483		totalWork += child.approximateNumDescendents
1484	}
1485	childrenResults := make(chan []string, len(node.children))
1486
1487	subDirs, filePaths := f.listMatches(node, filter)
1488
1489	// process child directories
1490	for _, child := range subDirs {
1491		numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
1492		childProcessor := func(child *pathMap) {
1493			childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
1494			childrenResults <- childResults
1495		}
1496		// If we're allowed to use more than 1 thread to process this directory,
1497		// then instead we use 1 thread for each subdirectory.
1498		// It would be strange to spawn threads for only some subdirectories.
1499		go childProcessor(child)
1500	}
1501
1502	// collect results
1503	for i := 0; i < len(subDirs); i++ {
1504		childResults := <-childrenResults
1505		filePaths = append(filePaths, childResults...)
1506	}
1507	close(childrenResults)
1508
1509	return filePaths
1510}
1511
1512// findInCacheSinglethreaded synchronously searches the cache for all matching file paths
1513// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
1514func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
1515	if node == nil {
1516		return []string{}
1517	}
1518
1519	nodes := []*pathMap{node}
1520	matches := []string{}
1521
1522	for len(nodes) > 0 {
1523		currentNode := nodes[0]
1524		nodes = nodes[1:]
1525
1526		subDirs, filePaths := f.listMatches(currentNode, filter)
1527
1528		nodes = append(nodes, subDirs...)
1529
1530		matches = append(matches, filePaths...)
1531	}
1532	return matches
1533}
1534