• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
18	"encoding/json"
19	"fmt"
20	"path/filepath"
21	"regexp"
22	"strings"
23
24	"github.com/google/blueprint/proptools"
25)
26
27// artifact contains relevant portions of Bazel's aquery proto, Artifact.
28// Represents a single artifact, whether it's a source file or a derived output file.
29type artifact struct {
30	Id             int
31	PathFragmentId int
32}
33
34type pathFragment struct {
35	Id       int
36	Label    string
37	ParentId int
38}
39
40// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
41type KeyValuePair struct {
42	Key   string
43	Value string
44}
45
46// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
47// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
48// data structure for storing large numbers of file paths.
49type depSetOfFiles struct {
50	Id                  int
51	DirectArtifactIds   []int
52	TransitiveDepSetIds []int
53}
54
55// action contains relevant portions of Bazel's aquery proto, Action.
56// Represents a single command line invocation in the Bazel build graph.
57type action struct {
58	Arguments            []string
59	EnvironmentVariables []KeyValuePair
60	InputDepSetIds       []int
61	Mnemonic             string
62	OutputIds            []int
63	TemplateContent      string
64	Substitutions        []KeyValuePair
65}
66
67// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
68// An aquery response from Bazel contains a single ActionGraphContainer proto.
69type actionGraphContainer struct {
70	Artifacts     []artifact
71	Actions       []action
72	DepSetOfFiles []depSetOfFiles
73	PathFragments []pathFragment
74}
75
76// BuildStatement contains information to register a build statement corresponding (one to one)
77// with a Bazel action from Bazel's action graph.
78type BuildStatement struct {
79	Command      string
80	Depfile      *string
81	OutputPaths  []string
82	InputPaths   []string
83	SymlinkPaths []string
84	Env          []KeyValuePair
85	Mnemonic     string
86}
87
88// A helper type for aquery processing which facilitates retrieval of path IDs from their
89// less readable Bazel structures (depset and path fragment).
90type aqueryArtifactHandler struct {
91	// Maps middleman artifact Id to input artifact depset ID.
92	// Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
93	// if we find a middleman action which has outputs [foo, bar], and output [baz_middleman], then,
94	// for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
95	// that action instead.
96	middlemanIdToDepsetIds map[int][]int
97	// Maps depset Id to depset struct.
98	depsetIdToDepset map[int]depSetOfFiles
99	// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
100	// may be an expensive operation.
101	depsetIdToArtifactIdsCache map[int][]int
102	// Maps artifact Id to fully expanded path.
103	artifactIdToPath map[int]string
104}
105
106// The tokens should be substituted with the value specified here, instead of the
107// one returned in 'substitutions' of TemplateExpand action.
108var TemplateActionOverriddenTokens = map[string]string{
109	// Uses "python3" for %python_binary% instead of the value returned by aquery
110	// which is "py3wrapper.sh". See removePy3wrapperScript.
111	"%python_binary%": "python3",
112}
113
114// This pattern matches the MANIFEST file created for a py_binary target.
115var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$")
116
117// The file name of py3wrapper.sh, which is used by py_binary targets.
118var py3wrapperFileName = "/py3wrapper.sh"
119
120func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) {
121	pathFragments := map[int]pathFragment{}
122	for _, pathFragment := range aqueryResult.PathFragments {
123		pathFragments[pathFragment.Id] = pathFragment
124	}
125
126	artifactIdToPath := map[int]string{}
127	for _, artifact := range aqueryResult.Artifacts {
128		artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
129		if err != nil {
130			return nil, err
131		}
132		artifactIdToPath[artifact.Id] = artifactPath
133	}
134
135	depsetIdToDepset := map[int]depSetOfFiles{}
136	for _, depset := range aqueryResult.DepSetOfFiles {
137		depsetIdToDepset[depset.Id] = depset
138	}
139
140	// Do a pass through all actions to identify which artifacts are middleman artifacts.
141	middlemanIdToDepsetIds := map[int][]int{}
142	for _, actionEntry := range aqueryResult.Actions {
143		if actionEntry.Mnemonic == "Middleman" {
144			for _, outputId := range actionEntry.OutputIds {
145				middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds
146			}
147		}
148	}
149	return &aqueryArtifactHandler{
150		middlemanIdToDepsetIds:     middlemanIdToDepsetIds,
151		depsetIdToDepset:           depsetIdToDepset,
152		depsetIdToArtifactIdsCache: map[int][]int{},
153		artifactIdToPath:           artifactIdToPath,
154	}, nil
155}
156
157func (a *aqueryArtifactHandler) getInputPaths(depsetIds []int) ([]string, error) {
158	inputPaths := []string{}
159
160	for _, inputDepSetId := range depsetIds {
161		inputArtifacts, err := a.artifactIdsFromDepsetId(inputDepSetId)
162		if err != nil {
163			return nil, err
164		}
165		for _, inputId := range inputArtifacts {
166			if middlemanInputDepsetIds, isMiddlemanArtifact := a.middlemanIdToDepsetIds[inputId]; isMiddlemanArtifact {
167				// Add all inputs from middleman actions which created middleman artifacts which are
168				// in the inputs for this action.
169				swappedInputPaths, err := a.getInputPaths(middlemanInputDepsetIds)
170				if err != nil {
171					return nil, err
172				}
173				inputPaths = append(inputPaths, swappedInputPaths...)
174			} else {
175				inputPath, exists := a.artifactIdToPath[inputId]
176				if !exists {
177					return nil, fmt.Errorf("undefined input artifactId %d", inputId)
178				}
179				inputPaths = append(inputPaths, inputPath)
180			}
181		}
182	}
183
184	// TODO(b/197135294): Clean up this custom runfiles handling logic when
185	// SourceSymlinkManifest and SymlinkTree actions are supported.
186	filteredInputPaths := filterOutPy3wrapperAndManifestFileFromInputPaths(inputPaths)
187
188	return filteredInputPaths, nil
189}
190
191// See go/python-binary-host-mixed-build for more details.
192// 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of
193// Bazel py_binary targets, so there is no Ninja build statements generated for creating it.
194// 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
195// but it doesn't contain sufficient information so no Ninja build statements are generated
196// for creating it.
197// So in mixed build mode, when these two are used as input of some Ninja build statement,
198// since there is no build statement to create them, they should be removed from input paths.
199func filterOutPy3wrapperAndManifestFileFromInputPaths(inputPaths []string) []string {
200	filteredInputPaths := []string{}
201	for _, path := range inputPaths {
202		if strings.HasSuffix(path, py3wrapperFileName) || manifestFilePattern.MatchString(path) {
203			continue
204		}
205		filteredInputPaths = append(filteredInputPaths, path)
206	}
207	return filteredInputPaths
208}
209
210func (a *aqueryArtifactHandler) artifactIdsFromDepsetId(depsetId int) ([]int, error) {
211	if result, exists := a.depsetIdToArtifactIdsCache[depsetId]; exists {
212		return result, nil
213	}
214	if depset, exists := a.depsetIdToDepset[depsetId]; exists {
215		result := depset.DirectArtifactIds
216		for _, childId := range depset.TransitiveDepSetIds {
217			childArtifactIds, err := a.artifactIdsFromDepsetId(childId)
218			if err != nil {
219				return nil, err
220			}
221			result = append(result, childArtifactIds...)
222		}
223		a.depsetIdToArtifactIdsCache[depsetId] = result
224		return result, nil
225	} else {
226		return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
227	}
228}
229
230// AqueryBuildStatements returns an array of BuildStatements which should be registered (and output
231// to a ninja file) to correspond one-to-one with the given action graph json proto (from a bazel
232// aquery invocation).
233func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, error) {
234	buildStatements := []BuildStatement{}
235
236	var aqueryResult actionGraphContainer
237	err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
238	if err != nil {
239		return nil, err
240	}
241	aqueryHandler, err := newAqueryHandler(aqueryResult)
242	if err != nil {
243		return nil, err
244	}
245
246	for _, actionEntry := range aqueryResult.Actions {
247		if shouldSkipAction(actionEntry) {
248			continue
249		}
250		outputPaths := []string{}
251		var depfile *string
252		for _, outputId := range actionEntry.OutputIds {
253			outputPath, exists := aqueryHandler.artifactIdToPath[outputId]
254			if !exists {
255				return nil, fmt.Errorf("undefined outputId %d", outputId)
256			}
257			ext := filepath.Ext(outputPath)
258			if ext == ".d" {
259				if depfile != nil {
260					return nil, fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
261				} else {
262					depfile = &outputPath
263				}
264			} else {
265				outputPaths = append(outputPaths, outputPath)
266			}
267		}
268		inputPaths, err := aqueryHandler.getInputPaths(actionEntry.InputDepSetIds)
269		if err != nil {
270			return nil, err
271		}
272
273		buildStatement := BuildStatement{
274			Command:     strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " "),
275			Depfile:     depfile,
276			OutputPaths: outputPaths,
277			InputPaths:  inputPaths,
278			Env:         actionEntry.EnvironmentVariables,
279			Mnemonic:    actionEntry.Mnemonic,
280		}
281
282		if isSymlinkAction(actionEntry) {
283			if len(inputPaths) != 1 || len(outputPaths) != 1 {
284				return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
285			}
286			out := outputPaths[0]
287			outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
288			out = proptools.ShellEscapeIncludingSpaces(out)
289			in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
290			// Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
291			buildStatement.Command = fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
292			buildStatement.SymlinkPaths = outputPaths[:]
293		} else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 {
294			if len(outputPaths) != 1 {
295				return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
296			}
297			expandedTemplateContent := expandTemplateContent(actionEntry)
298			// The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
299			// and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
300			// change \n to space and mess up the format of Python programs.
301			// sed is used to convert \\n back to \n before saving to output file.
302			// See go/python-binary-host-mixed-build for more details.
303			command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
304				escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
305			buildStatement.Command = command
306		} else if isPythonZipperAction(actionEntry) {
307			if len(inputPaths) < 1 || len(outputPaths) != 1 {
308				return nil, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths)
309			}
310			buildStatement.InputPaths, buildStatement.Command = removePy3wrapperScript(buildStatement)
311			buildStatement.Command = addCommandForPyBinaryRunfilesDir(buildStatement, inputPaths[0], outputPaths[0])
312			// Add the python zip file as input of the corresponding python binary stub script in Ninja build statements.
313			// In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input,
314			// which is not sufficient without the python zip file from which runfiles directory is created for py_binary.
315			//
316			// The following logic relies on that Bazel aquery output returns actions in the order that
317			// PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions
318			// in that order, the following logic might not find the build statement generated for Python binary
319			// stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output.
320			// See go/python-binary-host-mixed-build for more details.
321			pythonZipFilePath := outputPaths[0]
322			pyBinaryFound := false
323			for i, _ := range buildStatements {
324				if len(buildStatements[i].OutputPaths) == 1 && buildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath {
325					buildStatements[i].InputPaths = append(buildStatements[i].InputPaths, pythonZipFilePath)
326					pyBinaryFound = true
327				}
328			}
329			if !pyBinaryFound {
330				return nil, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths)
331			}
332		} else if len(actionEntry.Arguments) < 1 {
333			return nil, fmt.Errorf("received action with no command: [%v]", buildStatement)
334		}
335		buildStatements = append(buildStatements, buildStatement)
336	}
337
338	return buildStatements, nil
339}
340
341// expandTemplateContent substitutes the tokens in a template.
342func expandTemplateContent(actionEntry action) string {
343	replacerString := []string{}
344	for _, pair := range actionEntry.Substitutions {
345		value := pair.Value
346		if val, ok := TemplateActionOverriddenTokens[pair.Key]; ok {
347			value = val
348		}
349		replacerString = append(replacerString, pair.Key, value)
350	}
351	replacer := strings.NewReplacer(replacerString...)
352	return replacer.Replace(actionEntry.TemplateContent)
353}
354
355func escapeCommandlineArgument(str string) string {
356	// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
357	replacer := strings.NewReplacer(
358		`\`, `\\`,
359		`$`, `\$`,
360		"`", "\\`",
361		`"`, `\"`,
362		"\n", "\\n",
363		`'`, `'"'"'`,
364	)
365	return replacer.Replace(str)
366}
367
368// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of
369// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but
370// there is no action returned by aquery for creating it. So in mixed build "python3" is used
371// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be
372// removed from input paths and command of creating python zip file.
373// See go/python-binary-host-mixed-build for more details.
374// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode.
375func removePy3wrapperScript(bs BuildStatement) (newInputPaths []string, newCommand string) {
376	// Remove from inputs
377	filteredInputPaths := []string{}
378	for _, path := range bs.InputPaths {
379		if !strings.HasSuffix(path, py3wrapperFileName) {
380			filteredInputPaths = append(filteredInputPaths, path)
381		}
382	}
383	newInputPaths = filteredInputPaths
384
385	// Remove from command line
386	var re = regexp.MustCompile(`\S*` + py3wrapperFileName)
387	newCommand = re.ReplaceAllString(bs.Command, "")
388	return
389}
390
391// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory.
392// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of
393// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets,
394// but since SourceSymlinkManifest doesn't contain sufficient information
395// so MANIFEST file could not be created, which also blocks the creation of runfiles directory.
396// See go/python-binary-host-mixed-build for more details.
397// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action.
398func addCommandForPyBinaryRunfilesDir(bs BuildStatement, zipperCommandPath, zipFilePath string) string {
399	// Unzip the zip file, zipFilePath looks like <python_binary>.zip
400	runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles"
401	command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName)
402	// Create a symbolic link in <python_binary>.runfiles/, which is the expected structure
403	// when running the python binary stub script.
404	command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName)
405	return bs.Command + " && " + command
406}
407
408func isSymlinkAction(a action) bool {
409	return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink"
410}
411
412func isTemplateExpandAction(a action) bool {
413	return a.Mnemonic == "TemplateExpand"
414}
415
416func isPythonZipperAction(a action) bool {
417	return a.Mnemonic == "PythonZipper"
418}
419
420func shouldSkipAction(a action) bool {
421	// TODO(b/180945121): Handle complex symlink actions.
422	if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" {
423		return true
424	}
425	// Middleman actions are not handled like other actions; they are handled separately as a
426	// preparatory step so that their inputs may be relayed to actions depending on middleman
427	// artifacts.
428	if a.Mnemonic == "Middleman" {
429		return true
430	}
431	// Skip "Fail" actions, which are placeholder actions designed to always fail.
432	if a.Mnemonic == "Fail" {
433		return true
434	}
435	// TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
436	// about the contents that are written.
437	if a.Mnemonic == "FileWrite" {
438		return true
439	}
440	return false
441}
442
443func expandPathFragment(id int, pathFragmentsMap map[int]pathFragment) (string, error) {
444	labels := []string{}
445	currId := id
446	// Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
447	for currId > 0 {
448		currFragment, ok := pathFragmentsMap[currId]
449		if !ok {
450			return "", fmt.Errorf("undefined path fragment id %d", currId)
451		}
452		labels = append([]string{currFragment.Label}, labels...)
453		if currId == currFragment.ParentId {
454			return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment)
455		}
456		currId = currFragment.ParentId
457	}
458	return filepath.Join(labels...), nil
459}
460