• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
18	"crypto/sha256"
19	"encoding/base64"
20	"fmt"
21	"path/filepath"
22	"reflect"
23	"sort"
24	"strings"
25	"sync"
26
27	analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
28
29	"github.com/google/blueprint/metrics"
30	"github.com/google/blueprint/proptools"
31	"google.golang.org/protobuf/proto"
32)
33
34type artifactId int
35type depsetId int
36type pathFragmentId int
37
38// artifact contains relevant portions of Bazel's aquery proto, Artifact.
39// Represents a single artifact, whether it's a source file or a derived output file.
40type artifact struct {
41	Id             artifactId
42	PathFragmentId pathFragmentId
43}
44
45type pathFragment struct {
46	Id       pathFragmentId
47	Label    string
48	ParentId pathFragmentId
49}
50
51// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
52type KeyValuePair struct {
53	Key   string
54	Value string
55}
56
57// AqueryDepset is a depset definition from Bazel's aquery response. This is
58// akin to the `depSetOfFiles` in the response proto, except:
59//   - direct artifacts are enumerated by full path instead of by ID
60//   - it has a hash of the depset contents, instead of an int ID (for determinism)
61//
62// A depset is a data structure for efficient transitive handling of artifact
63// paths. A single depset consists of one or more artifact paths and one or
64// more "child" depsets.
65type AqueryDepset struct {
66	ContentHash            string
67	DirectArtifacts        []string
68	TransitiveDepSetHashes []string
69}
70
71// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
72// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
73// data structure for storing large numbers of file paths.
74type depSetOfFiles struct {
75	Id                  depsetId
76	DirectArtifactIds   []artifactId
77	TransitiveDepSetIds []depsetId
78}
79
80// action contains relevant portions of Bazel's aquery proto, Action.
81// Represents a single command line invocation in the Bazel build graph.
82type action struct {
83	Arguments            []string
84	EnvironmentVariables []KeyValuePair
85	InputDepSetIds       []depsetId
86	Mnemonic             string
87	OutputIds            []artifactId
88	TemplateContent      string
89	Substitutions        []KeyValuePair
90	FileContents         string
91}
92
93// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
94// An aquery response from Bazel contains a single ActionGraphContainer proto.
95type actionGraphContainer struct {
96	Artifacts     []artifact
97	Actions       []action
98	DepSetOfFiles []depSetOfFiles
99	PathFragments []pathFragment
100}
101
102// BuildStatement contains information to register a build statement corresponding (one to one)
103// with a Bazel action from Bazel's action graph.
104type BuildStatement struct {
105	Command      string
106	Depfile      *string
107	OutputPaths  []string
108	SymlinkPaths []string
109	Env          []*analysis_v2_proto.KeyValuePair
110	Mnemonic     string
111
112	// Inputs of this build statement, either as unexpanded depsets or expanded
113	// input paths. There should be no overlap between these fields; an input
114	// path should either be included as part of an unexpanded depset or a raw
115	// input path string, but not both.
116	InputDepsetHashes []string
117	InputPaths        []string
118	FileContents      string
119}
120
121// A helper type for aquery processing which facilitates retrieval of path IDs from their
122// less readable Bazel structures (depset and path fragment).
123type aqueryArtifactHandler struct {
124	// Maps depset id to AqueryDepset, a representation of depset which is
125	// post-processed for middleman artifact handling, unhandled artifact
126	// dropping, content hashing, etc.
127	depsetIdToAqueryDepset map[depsetId]AqueryDepset
128	emptyDepsetIds         map[depsetId]struct{}
129	// Maps content hash to AqueryDepset.
130	depsetHashToAqueryDepset map[string]AqueryDepset
131
132	// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
133	// may be an expensive operation.
134	depsetHashToArtifactPathsCache sync.Map
135	// Maps artifact ids to fully expanded paths.
136	artifactIdToPath map[artifactId]string
137}
138
139// The tokens should be substituted with the value specified here, instead of the
140// one returned in 'substitutions' of TemplateExpand action.
141var templateActionOverriddenTokens = map[string]string{
142	// Uses "python3" for %python_binary% instead of the value returned by aquery
143	// which is "py3wrapper.sh". See removePy3wrapperScript.
144	"%python_binary%": "python3",
145}
146
147const (
148	middlemanMnemonic = "Middleman"
149	// The file name of py3wrapper.sh, which is used by py_binary targets.
150	py3wrapperFileName = "/py3wrapper.sh"
151)
152
153func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
154	m := map[K]V{}
155	for _, v := range values {
156		m[keyFn(v)] = v
157	}
158	return m
159}
160
161func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aqueryArtifactHandler, error) {
162	pathFragments := indexBy(aqueryResult.PathFragments, func(pf *analysis_v2_proto.PathFragment) pathFragmentId {
163		return pathFragmentId(pf.Id)
164	})
165
166	artifactIdToPath := make(map[artifactId]string, len(aqueryResult.Artifacts))
167	for _, artifact := range aqueryResult.Artifacts {
168		artifactPath, err := expandPathFragment(pathFragmentId(artifact.PathFragmentId), pathFragments)
169		if err != nil {
170			return nil, err
171		}
172		artifactIdToPath[artifactId(artifact.Id)] = artifactPath
173	}
174
175	// Map middleman artifact ContentHash to input artifact depset ID.
176	// Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
177	// if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
178	// for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
179	// that action instead.
180	middlemanIdToDepsetIds := map[artifactId][]uint32{}
181	for _, actionEntry := range aqueryResult.Actions {
182		if actionEntry.Mnemonic == middlemanMnemonic {
183			for _, outputId := range actionEntry.OutputIds {
184				middlemanIdToDepsetIds[artifactId(outputId)] = actionEntry.InputDepSetIds
185			}
186		}
187	}
188
189	depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d *analysis_v2_proto.DepSetOfFiles) depsetId {
190		return depsetId(d.Id)
191	})
192
193	aqueryHandler := aqueryArtifactHandler{
194		depsetIdToAqueryDepset:         map[depsetId]AqueryDepset{},
195		depsetHashToAqueryDepset:       map[string]AqueryDepset{},
196		depsetHashToArtifactPathsCache: sync.Map{},
197		emptyDepsetIds:                 make(map[depsetId]struct{}, 0),
198		artifactIdToPath:               artifactIdToPath,
199	}
200
201	// Validate and adjust aqueryResult.DepSetOfFiles values.
202	for _, depset := range aqueryResult.DepSetOfFiles {
203		_, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
204		if err != nil {
205			return nil, err
206		}
207	}
208
209	return &aqueryHandler, nil
210}
211
212// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
213// depset.
214func (a *aqueryArtifactHandler) populateDepsetMaps(depset *analysis_v2_proto.DepSetOfFiles, middlemanIdToDepsetIds map[artifactId][]uint32, depsetIdToDepset map[depsetId]*analysis_v2_proto.DepSetOfFiles) (*AqueryDepset, error) {
215	if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depsetId(depset.Id)]; containsDepset {
216		return &aqueryDepset, nil
217	}
218	transitiveDepsetIds := depset.TransitiveDepSetIds
219	directArtifactPaths := make([]string, 0, len(depset.DirectArtifactIds))
220	for _, id := range depset.DirectArtifactIds {
221		aId := artifactId(id)
222		path, pathExists := a.artifactIdToPath[aId]
223		if !pathExists {
224			return nil, fmt.Errorf("undefined input artifactId %d", aId)
225		}
226		// Filter out any inputs which are universally dropped, and swap middleman
227		// artifacts with their corresponding depsets.
228		if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[aId]; isMiddleman {
229			// Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
230			transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
231		} else if strings.HasSuffix(path, py3wrapperFileName) ||
232			strings.HasPrefix(path, "../bazel_tools") {
233			continue
234			// Drop these artifacts.
235			// See go/python-binary-host-mixed-build for more details.
236			// 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
237			// TemplateExpandAction handles everything necessary to launch a Pythin application.
238			// 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
239			// containing depset to always be considered newer than their outputs.
240		} else {
241			directArtifactPaths = append(directArtifactPaths, path)
242		}
243	}
244
245	childDepsetHashes := make([]string, 0, len(transitiveDepsetIds))
246	for _, id := range transitiveDepsetIds {
247		childDepsetId := depsetId(id)
248		childDepset, exists := depsetIdToDepset[childDepsetId]
249		if !exists {
250			if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
251				continue
252			} else {
253				return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
254			}
255		}
256		if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
257			return nil, err
258		} else if childAqueryDepset == nil {
259			continue
260		} else {
261			childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
262		}
263	}
264	if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
265		a.emptyDepsetIds[depsetId(depset.Id)] = struct{}{}
266		return nil, nil
267	}
268	aqueryDepset := AqueryDepset{
269		ContentHash:            depsetContentHash(directArtifactPaths, childDepsetHashes),
270		DirectArtifacts:        directArtifactPaths,
271		TransitiveDepSetHashes: childDepsetHashes,
272	}
273	a.depsetIdToAqueryDepset[depsetId(depset.Id)] = aqueryDepset
274	a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
275	return &aqueryDepset, nil
276}
277
278// getInputPaths flattens the depsets of the given IDs and returns all transitive
279// input paths contained in these depsets.
280// This is a potentially expensive operation, and should not be invoked except
281// for actions which need specialized input handling.
282func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, error) {
283	var inputPaths []string
284
285	for _, id := range depsetIds {
286		inputDepSetId := depsetId(id)
287		depset := a.depsetIdToAqueryDepset[inputDepSetId]
288		inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
289		if err != nil {
290			return nil, err
291		}
292		for _, inputPath := range inputArtifacts {
293			inputPaths = append(inputPaths, inputPath)
294		}
295	}
296
297	return inputPaths, nil
298}
299
300func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
301	if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
302		return result.([]string), nil
303	}
304	if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
305		result := depset.DirectArtifacts
306		for _, childHash := range depset.TransitiveDepSetHashes {
307			childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
308			if err != nil {
309				return nil, err
310			}
311			result = append(result, childArtifactIds...)
312		}
313		a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
314		return result, nil
315	} else {
316		return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
317	}
318}
319
320// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
321// which should be registered (and output to a ninja file) to correspond with Bazel's
322// action graph, as described by the given action graph json proto.
323// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
324// are one-to-one with Bazel's depSetOfFiles objects.
325func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
326	aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
327	err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
328	if err != nil {
329		return nil, nil, err
330	}
331
332	var aqueryHandler *aqueryArtifactHandler
333	{
334		eventHandler.Begin("init_handler")
335		defer eventHandler.End("init_handler")
336		aqueryHandler, err = newAqueryHandler(aqueryProto)
337		if err != nil {
338			return nil, nil, err
339		}
340	}
341
342	// allocate both length and capacity so each goroutine can write to an index independently without
343	// any need for synchronization for slice access.
344	buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
345	{
346		eventHandler.Begin("build_statements")
347		defer eventHandler.End("build_statements")
348		wg := sync.WaitGroup{}
349		var errOnce sync.Once
350
351		for i, actionEntry := range aqueryProto.Actions {
352			wg.Add(1)
353			go func(i int, actionEntry *analysis_v2_proto.Action) {
354				buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry)
355				if aErr != nil {
356					errOnce.Do(func() {
357						err = aErr
358					})
359				} else {
360					// set build statement at an index rather than appending such that each goroutine does not
361					// impact other goroutines
362					buildStatements[i] = buildStatement
363				}
364				wg.Done()
365			}(i, actionEntry)
366		}
367		wg.Wait()
368	}
369	if err != nil {
370		return nil, nil, err
371	}
372
373	depsetsByHash := map[string]AqueryDepset{}
374	depsets := make([]AqueryDepset, 0, len(aqueryHandler.depsetIdToAqueryDepset))
375	{
376		eventHandler.Begin("depsets")
377		defer eventHandler.End("depsets")
378		for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
379			if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
380				// Two depsets collide on hash. Ensure that their contents are identical.
381				if !reflect.DeepEqual(aqueryDepset, prevEntry) {
382					return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
383				}
384			} else {
385				depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
386				depsets = append(depsets, aqueryDepset)
387			}
388		}
389	}
390
391	eventHandler.Do("build_statement_sort", func() {
392		// Build Statements and depsets must be sorted by their content hash to
393		// preserve determinism between builds (this will result in consistent ninja file
394		// output). Note they are not sorted by their original IDs nor their Bazel ordering,
395		// as Bazel gives nondeterministic ordering / identifiers in aquery responses.
396		sort.Slice(buildStatements, func(i, j int) bool {
397			// Sort all nil statements to the end of the slice
398			if buildStatements[i] == nil {
399				return false
400			} else if buildStatements[j] == nil {
401				return true
402			}
403			//For build statements, compare output lists. In Bazel, each output file
404			// may only have one action which generates it, so this will provide
405			// a deterministic ordering.
406			outputs_i := buildStatements[i].OutputPaths
407			outputs_j := buildStatements[j].OutputPaths
408			if len(outputs_i) != len(outputs_j) {
409				return len(outputs_i) < len(outputs_j)
410			}
411			if len(outputs_i) == 0 {
412				// No outputs for these actions, so compare commands.
413				return buildStatements[i].Command < buildStatements[j].Command
414			}
415			// There may be multiple outputs, but the output ordering is deterministic.
416			return outputs_i[0] < outputs_j[0]
417		})
418	})
419	eventHandler.Do("depset_sort", func() {
420		sort.Slice(depsets, func(i, j int) bool {
421			return depsets[i].ContentHash < depsets[j].ContentHash
422		})
423	})
424	return buildStatements, depsets, nil
425}
426
427// depsetContentHash computes and returns a SHA256 checksum of the contents of
428// the given depset. This content hash may serve as the depset's identifier.
429// Using a content hash for an identifier is superior for determinism. (For example,
430// using an integer identifier which depends on the order in which the depsets are
431// created would result in nondeterministic depset IDs.)
432func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
433	h := sha256.New()
434	// Use newline as delimiter, as paths cannot contain newline.
435	h.Write([]byte(strings.Join(directPaths, "\n")))
436	h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
437	fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
438	return fullHash
439}
440
441func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []uint32) ([]string, error) {
442	var hashes []string
443	for _, id := range inputDepsetIds {
444		dId := depsetId(id)
445		if aqueryDepset, exists := a.depsetIdToAqueryDepset[dId]; !exists {
446			if _, empty := a.emptyDepsetIds[dId]; !empty {
447				return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", dId)
448			}
449		} else {
450			hashes = append(hashes, aqueryDepset.ContentHash)
451		}
452	}
453	return hashes, nil
454}
455
456func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
457	command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
458	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
459	if err != nil {
460		return nil, err
461	}
462	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
463	if err != nil {
464		return nil, err
465	}
466
467	buildStatement := &BuildStatement{
468		Command:           command,
469		Depfile:           depfile,
470		OutputPaths:       outputPaths,
471		InputDepsetHashes: inputDepsetHashes,
472		Env:               actionEntry.EnvironmentVariables,
473		Mnemonic:          actionEntry.Mnemonic,
474	}
475	return buildStatement, nil
476}
477
478func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
479	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
480	if err != nil {
481		return nil, err
482	}
483	if len(outputPaths) != 1 {
484		return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
485	}
486	expandedTemplateContent := expandTemplateContent(actionEntry)
487	// The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
488	// and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
489	// change \n to space and mess up the format of Python programs.
490	// sed is used to convert \\n back to \n before saving to output file.
491	// See go/python-binary-host-mixed-build for more details.
492	command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
493		escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
494	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
495	if err != nil {
496		return nil, err
497	}
498
499	buildStatement := &BuildStatement{
500		Command:           command,
501		Depfile:           depfile,
502		OutputPaths:       outputPaths,
503		InputDepsetHashes: inputDepsetHashes,
504		Env:               actionEntry.EnvironmentVariables,
505		Mnemonic:          actionEntry.Mnemonic,
506	}
507	return buildStatement, nil
508}
509
510func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
511	outputPaths, _, err := a.getOutputPaths(actionEntry)
512	var depsetHashes []string
513	if err == nil {
514		depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
515	}
516	if err != nil {
517		return nil, err
518	}
519	return &BuildStatement{
520		Depfile:           nil,
521		OutputPaths:       outputPaths,
522		Env:               actionEntry.EnvironmentVariables,
523		Mnemonic:          actionEntry.Mnemonic,
524		InputDepsetHashes: depsetHashes,
525		FileContents:      actionEntry.FileContents,
526	}, nil
527}
528
529func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
530	outputPaths, _, err := a.getOutputPaths(actionEntry)
531	if err != nil {
532		return nil, err
533	}
534	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
535	if err != nil {
536		return nil, err
537	}
538	if len(inputPaths) != 1 || len(outputPaths) != 1 {
539		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
540	}
541	// The actual command is generated in bazelSingleton.GenerateBuildActions
542	return &BuildStatement{
543		Depfile:     nil,
544		OutputPaths: outputPaths,
545		Env:         actionEntry.EnvironmentVariables,
546		Mnemonic:    actionEntry.Mnemonic,
547		InputPaths:  inputPaths,
548	}, nil
549}
550
551func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
552	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
553	if err != nil {
554		return nil, err
555	}
556
557	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
558	if err != nil {
559		return nil, err
560	}
561	if len(inputPaths) != 1 || len(outputPaths) != 1 {
562		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
563	}
564	out := outputPaths[0]
565	outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
566	out = proptools.ShellEscapeIncludingSpaces(out)
567	in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
568	// Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
569	command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
570	symlinkPaths := outputPaths[:]
571
572	buildStatement := &BuildStatement{
573		Command:      command,
574		Depfile:      depfile,
575		OutputPaths:  outputPaths,
576		InputPaths:   inputPaths,
577		Env:          actionEntry.EnvironmentVariables,
578		Mnemonic:     actionEntry.Mnemonic,
579		SymlinkPaths: symlinkPaths,
580	}
581	return buildStatement, nil
582}
583
584func (a *aqueryArtifactHandler) getOutputPaths(actionEntry *analysis_v2_proto.Action) (outputPaths []string, depfile *string, err error) {
585	for _, outputId := range actionEntry.OutputIds {
586		outputPath, exists := a.artifactIdToPath[artifactId(outputId)]
587		if !exists {
588			err = fmt.Errorf("undefined outputId %d", outputId)
589			return
590		}
591		ext := filepath.Ext(outputPath)
592		if ext == ".d" {
593			if depfile != nil {
594				err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
595				return
596			} else {
597				depfile = &outputPath
598			}
599		} else {
600			outputPaths = append(outputPaths, outputPath)
601		}
602	}
603	return
604}
605
606// expandTemplateContent substitutes the tokens in a template.
607func expandTemplateContent(actionEntry *analysis_v2_proto.Action) string {
608	replacerString := make([]string, len(actionEntry.Substitutions)*2)
609	for i, pair := range actionEntry.Substitutions {
610		value := pair.Value
611		if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
612			value = val
613		}
614		replacerString[i*2] = pair.Key
615		replacerString[i*2+1] = value
616	}
617	replacer := strings.NewReplacer(replacerString...)
618	return replacer.Replace(actionEntry.TemplateContent)
619}
620
621// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
622var commandLineArgumentReplacer = strings.NewReplacer(
623	`\`, `\\`,
624	`$`, `\$`,
625	"`", "\\`",
626	`"`, `\"`,
627	"\n", "\\n",
628	`'`, `'"'"'`,
629)
630
631func escapeCommandlineArgument(str string) string {
632	return commandLineArgumentReplacer.Replace(str)
633}
634
635func (a *aqueryArtifactHandler) actionToBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
636	switch actionEntry.Mnemonic {
637	// Middleman actions are not handled like other actions; they are handled separately as a
638	// preparatory step so that their inputs may be relayed to actions depending on middleman
639	// artifacts.
640	case middlemanMnemonic:
641		return nil, nil
642	// PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
643	case "PythonZipper":
644		return nil, nil
645	// Skip "Fail" actions, which are placeholder actions designed to always fail.
646	case "Fail":
647		return nil, nil
648	case "BaselineCoverage":
649		return nil, nil
650	case "Symlink", "SolibSymlink", "ExecutableSymlink":
651		return a.symlinkActionBuildStatement(actionEntry)
652	case "TemplateExpand":
653		if len(actionEntry.Arguments) < 1 {
654			return a.templateExpandActionBuildStatement(actionEntry)
655		}
656	case "FileWrite", "SourceSymlinkManifest":
657		return a.fileWriteActionBuildStatement(actionEntry)
658	case "SymlinkTree":
659		return a.symlinkTreeActionBuildStatement(actionEntry)
660	}
661
662	if len(actionEntry.Arguments) < 1 {
663		return nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
664	}
665	return a.normalActionBuildStatement(actionEntry)
666
667}
668
669func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]*analysis_v2_proto.PathFragment) (string, error) {
670	var labels []string
671	currId := id
672	// Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
673	for currId > 0 {
674		currFragment, ok := pathFragmentsMap[currId]
675		if !ok {
676			return "", fmt.Errorf("undefined path fragment id %d", currId)
677		}
678		labels = append([]string{currFragment.Label}, labels...)
679		parentId := pathFragmentId(currFragment.ParentId)
680		if currId == parentId {
681			return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
682		}
683		currId = parentId
684	}
685	return filepath.Join(labels...), nil
686}
687