1// Copyright 2020 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package bazel 16 17import ( 18 "encoding/json" 19 "fmt" 20 "path/filepath" 21 "regexp" 22 "strings" 23 24 "github.com/google/blueprint/proptools" 25) 26 27// artifact contains relevant portions of Bazel's aquery proto, Artifact. 28// Represents a single artifact, whether it's a source file or a derived output file. 29type artifact struct { 30 Id int 31 PathFragmentId int 32} 33 34type pathFragment struct { 35 Id int 36 Label string 37 ParentId int 38} 39 40// KeyValuePair represents Bazel's aquery proto, KeyValuePair. 41type KeyValuePair struct { 42 Key string 43 Value string 44} 45 46// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles. 47// Represents a data structure containing one or more files. Depsets in Bazel are an efficient 48// data structure for storing large numbers of file paths. 49type depSetOfFiles struct { 50 Id int 51 DirectArtifactIds []int 52 TransitiveDepSetIds []int 53} 54 55// action contains relevant portions of Bazel's aquery proto, Action. 56// Represents a single command line invocation in the Bazel build graph. 57type action struct { 58 Arguments []string 59 EnvironmentVariables []KeyValuePair 60 InputDepSetIds []int 61 Mnemonic string 62 OutputIds []int 63 TemplateContent string 64 Substitutions []KeyValuePair 65} 66 67// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer. 68// An aquery response from Bazel contains a single ActionGraphContainer proto. 69type actionGraphContainer struct { 70 Artifacts []artifact 71 Actions []action 72 DepSetOfFiles []depSetOfFiles 73 PathFragments []pathFragment 74} 75 76// BuildStatement contains information to register a build statement corresponding (one to one) 77// with a Bazel action from Bazel's action graph. 78type BuildStatement struct { 79 Command string 80 Depfile *string 81 OutputPaths []string 82 InputPaths []string 83 SymlinkPaths []string 84 Env []KeyValuePair 85 Mnemonic string 86} 87 88// A helper type for aquery processing which facilitates retrieval of path IDs from their 89// less readable Bazel structures (depset and path fragment). 90type aqueryArtifactHandler struct { 91 // Maps middleman artifact Id to input artifact depset ID. 92 // Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example, 93 // if we find a middleman action which has outputs [foo, bar], and output [baz_middleman], then, 94 // for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for 95 // that action instead. 96 middlemanIdToDepsetIds map[int][]int 97 // Maps depset Id to depset struct. 98 depsetIdToDepset map[int]depSetOfFiles 99 // depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening 100 // may be an expensive operation. 101 depsetIdToArtifactIdsCache map[int][]int 102 // Maps artifact Id to fully expanded path. 103 artifactIdToPath map[int]string 104} 105 106// The tokens should be substituted with the value specified here, instead of the 107// one returned in 'substitutions' of TemplateExpand action. 108var TemplateActionOverriddenTokens = map[string]string{ 109 // Uses "python3" for %python_binary% instead of the value returned by aquery 110 // which is "py3wrapper.sh". See removePy3wrapperScript. 111 "%python_binary%": "python3", 112} 113 114// This pattern matches the MANIFEST file created for a py_binary target. 115var manifestFilePattern = regexp.MustCompile(".*/.+\\.runfiles/MANIFEST$") 116 117// The file name of py3wrapper.sh, which is used by py_binary targets. 118var py3wrapperFileName = "/py3wrapper.sh" 119 120func newAqueryHandler(aqueryResult actionGraphContainer) (*aqueryArtifactHandler, error) { 121 pathFragments := map[int]pathFragment{} 122 for _, pathFragment := range aqueryResult.PathFragments { 123 pathFragments[pathFragment.Id] = pathFragment 124 } 125 126 artifactIdToPath := map[int]string{} 127 for _, artifact := range aqueryResult.Artifacts { 128 artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments) 129 if err != nil { 130 return nil, err 131 } 132 artifactIdToPath[artifact.Id] = artifactPath 133 } 134 135 depsetIdToDepset := map[int]depSetOfFiles{} 136 for _, depset := range aqueryResult.DepSetOfFiles { 137 depsetIdToDepset[depset.Id] = depset 138 } 139 140 // Do a pass through all actions to identify which artifacts are middleman artifacts. 141 middlemanIdToDepsetIds := map[int][]int{} 142 for _, actionEntry := range aqueryResult.Actions { 143 if actionEntry.Mnemonic == "Middleman" { 144 for _, outputId := range actionEntry.OutputIds { 145 middlemanIdToDepsetIds[outputId] = actionEntry.InputDepSetIds 146 } 147 } 148 } 149 return &aqueryArtifactHandler{ 150 middlemanIdToDepsetIds: middlemanIdToDepsetIds, 151 depsetIdToDepset: depsetIdToDepset, 152 depsetIdToArtifactIdsCache: map[int][]int{}, 153 artifactIdToPath: artifactIdToPath, 154 }, nil 155} 156 157func (a *aqueryArtifactHandler) getInputPaths(depsetIds []int) ([]string, error) { 158 inputPaths := []string{} 159 160 for _, inputDepSetId := range depsetIds { 161 inputArtifacts, err := a.artifactIdsFromDepsetId(inputDepSetId) 162 if err != nil { 163 return nil, err 164 } 165 for _, inputId := range inputArtifacts { 166 if middlemanInputDepsetIds, isMiddlemanArtifact := a.middlemanIdToDepsetIds[inputId]; isMiddlemanArtifact { 167 // Add all inputs from middleman actions which created middleman artifacts which are 168 // in the inputs for this action. 169 swappedInputPaths, err := a.getInputPaths(middlemanInputDepsetIds) 170 if err != nil { 171 return nil, err 172 } 173 inputPaths = append(inputPaths, swappedInputPaths...) 174 } else { 175 inputPath, exists := a.artifactIdToPath[inputId] 176 if !exists { 177 return nil, fmt.Errorf("undefined input artifactId %d", inputId) 178 } 179 inputPaths = append(inputPaths, inputPath) 180 } 181 } 182 } 183 184 // TODO(b/197135294): Clean up this custom runfiles handling logic when 185 // SourceSymlinkManifest and SymlinkTree actions are supported. 186 filteredInputPaths := filterOutPy3wrapperAndManifestFileFromInputPaths(inputPaths) 187 188 return filteredInputPaths, nil 189} 190 191// See go/python-binary-host-mixed-build for more details. 192// 1) For py3wrapper.sh, there is no action for creating py3wrapper.sh in the aquery output of 193// Bazel py_binary targets, so there is no Ninja build statements generated for creating it. 194// 2) For MANIFEST file, SourceSymlinkManifest action is in aquery output of Bazel py_binary targets, 195// but it doesn't contain sufficient information so no Ninja build statements are generated 196// for creating it. 197// So in mixed build mode, when these two are used as input of some Ninja build statement, 198// since there is no build statement to create them, they should be removed from input paths. 199func filterOutPy3wrapperAndManifestFileFromInputPaths(inputPaths []string) []string { 200 filteredInputPaths := []string{} 201 for _, path := range inputPaths { 202 if strings.HasSuffix(path, py3wrapperFileName) || manifestFilePattern.MatchString(path) { 203 continue 204 } 205 filteredInputPaths = append(filteredInputPaths, path) 206 } 207 return filteredInputPaths 208} 209 210func (a *aqueryArtifactHandler) artifactIdsFromDepsetId(depsetId int) ([]int, error) { 211 if result, exists := a.depsetIdToArtifactIdsCache[depsetId]; exists { 212 return result, nil 213 } 214 if depset, exists := a.depsetIdToDepset[depsetId]; exists { 215 result := depset.DirectArtifactIds 216 for _, childId := range depset.TransitiveDepSetIds { 217 childArtifactIds, err := a.artifactIdsFromDepsetId(childId) 218 if err != nil { 219 return nil, err 220 } 221 result = append(result, childArtifactIds...) 222 } 223 a.depsetIdToArtifactIdsCache[depsetId] = result 224 return result, nil 225 } else { 226 return nil, fmt.Errorf("undefined input depsetId %d", depsetId) 227 } 228} 229 230// AqueryBuildStatements returns an array of BuildStatements which should be registered (and output 231// to a ninja file) to correspond one-to-one with the given action graph json proto (from a bazel 232// aquery invocation). 233func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, error) { 234 buildStatements := []BuildStatement{} 235 236 var aqueryResult actionGraphContainer 237 err := json.Unmarshal(aqueryJsonProto, &aqueryResult) 238 if err != nil { 239 return nil, err 240 } 241 aqueryHandler, err := newAqueryHandler(aqueryResult) 242 if err != nil { 243 return nil, err 244 } 245 246 for _, actionEntry := range aqueryResult.Actions { 247 if shouldSkipAction(actionEntry) { 248 continue 249 } 250 outputPaths := []string{} 251 var depfile *string 252 for _, outputId := range actionEntry.OutputIds { 253 outputPath, exists := aqueryHandler.artifactIdToPath[outputId] 254 if !exists { 255 return nil, fmt.Errorf("undefined outputId %d", outputId) 256 } 257 ext := filepath.Ext(outputPath) 258 if ext == ".d" { 259 if depfile != nil { 260 return nil, fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath) 261 } else { 262 depfile = &outputPath 263 } 264 } else { 265 outputPaths = append(outputPaths, outputPath) 266 } 267 } 268 inputPaths, err := aqueryHandler.getInputPaths(actionEntry.InputDepSetIds) 269 if err != nil { 270 return nil, err 271 } 272 273 buildStatement := BuildStatement{ 274 Command: strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " "), 275 Depfile: depfile, 276 OutputPaths: outputPaths, 277 InputPaths: inputPaths, 278 Env: actionEntry.EnvironmentVariables, 279 Mnemonic: actionEntry.Mnemonic, 280 } 281 282 if isSymlinkAction(actionEntry) { 283 if len(inputPaths) != 1 || len(outputPaths) != 1 { 284 return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths) 285 } 286 out := outputPaths[0] 287 outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out)) 288 out = proptools.ShellEscapeIncludingSpaces(out) 289 in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0])) 290 // Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`). 291 buildStatement.Command = fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in) 292 buildStatement.SymlinkPaths = outputPaths[:] 293 } else if isTemplateExpandAction(actionEntry) && len(actionEntry.Arguments) < 1 { 294 if len(outputPaths) != 1 { 295 return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths) 296 } 297 expandedTemplateContent := expandTemplateContent(actionEntry) 298 // The expandedTemplateContent is escaped for being used in double quotes and shell unescape, 299 // and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might 300 // change \n to space and mess up the format of Python programs. 301 // sed is used to convert \\n back to \n before saving to output file. 302 // See go/python-binary-host-mixed-build for more details. 303 command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`, 304 escapeCommandlineArgument(expandedTemplateContent), outputPaths[0]) 305 buildStatement.Command = command 306 } else if isPythonZipperAction(actionEntry) { 307 if len(inputPaths) < 1 || len(outputPaths) != 1 { 308 return nil, fmt.Errorf("Expect 1+ input and 1 output to python zipper action, got: input %q, output %q", inputPaths, outputPaths) 309 } 310 buildStatement.InputPaths, buildStatement.Command = removePy3wrapperScript(buildStatement) 311 buildStatement.Command = addCommandForPyBinaryRunfilesDir(buildStatement, inputPaths[0], outputPaths[0]) 312 // Add the python zip file as input of the corresponding python binary stub script in Ninja build statements. 313 // In Ninja build statements, the outputs of dependents of a python binary have python binary stub script as input, 314 // which is not sufficient without the python zip file from which runfiles directory is created for py_binary. 315 // 316 // The following logic relies on that Bazel aquery output returns actions in the order that 317 // PythonZipper is after TemplateAction of creating Python binary stub script. If later Bazel doesn't return actions 318 // in that order, the following logic might not find the build statement generated for Python binary 319 // stub script and the build might fail. So the check of pyBinaryFound is added to help debug in case later Bazel might change aquery output. 320 // See go/python-binary-host-mixed-build for more details. 321 pythonZipFilePath := outputPaths[0] 322 pyBinaryFound := false 323 for i, _ := range buildStatements { 324 if len(buildStatements[i].OutputPaths) == 1 && buildStatements[i].OutputPaths[0]+".zip" == pythonZipFilePath { 325 buildStatements[i].InputPaths = append(buildStatements[i].InputPaths, pythonZipFilePath) 326 pyBinaryFound = true 327 } 328 } 329 if !pyBinaryFound { 330 return nil, fmt.Errorf("Could not find the correspondinging Python binary stub script of PythonZipper: %q", outputPaths) 331 } 332 } else if len(actionEntry.Arguments) < 1 { 333 return nil, fmt.Errorf("received action with no command: [%v]", buildStatement) 334 } 335 buildStatements = append(buildStatements, buildStatement) 336 } 337 338 return buildStatements, nil 339} 340 341// expandTemplateContent substitutes the tokens in a template. 342func expandTemplateContent(actionEntry action) string { 343 replacerString := []string{} 344 for _, pair := range actionEntry.Substitutions { 345 value := pair.Value 346 if val, ok := TemplateActionOverriddenTokens[pair.Key]; ok { 347 value = val 348 } 349 replacerString = append(replacerString, pair.Key, value) 350 } 351 replacer := strings.NewReplacer(replacerString...) 352 return replacer.Replace(actionEntry.TemplateContent) 353} 354 355func escapeCommandlineArgument(str string) string { 356 // \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"' 357 replacer := strings.NewReplacer( 358 `\`, `\\`, 359 `$`, `\$`, 360 "`", "\\`", 361 `"`, `\"`, 362 "\n", "\\n", 363 `'`, `'"'"'`, 364 ) 365 return replacer.Replace(str) 366} 367 368// removePy3wrapperScript removes py3wrapper.sh from the input paths and command of the action of 369// creating python zip file in mixed build mode. py3wrapper.sh is returned as input by aquery but 370// there is no action returned by aquery for creating it. So in mixed build "python3" is used 371// as the PYTHON_BINARY in python binary stub script, and py3wrapper.sh is not needed and should be 372// removed from input paths and command of creating python zip file. 373// See go/python-binary-host-mixed-build for more details. 374// TODO(b/205879240) remove this after py3wrapper.sh could be created in the mixed build mode. 375func removePy3wrapperScript(bs BuildStatement) (newInputPaths []string, newCommand string) { 376 // Remove from inputs 377 filteredInputPaths := []string{} 378 for _, path := range bs.InputPaths { 379 if !strings.HasSuffix(path, py3wrapperFileName) { 380 filteredInputPaths = append(filteredInputPaths, path) 381 } 382 } 383 newInputPaths = filteredInputPaths 384 385 // Remove from command line 386 var re = regexp.MustCompile(`\S*` + py3wrapperFileName) 387 newCommand = re.ReplaceAllString(bs.Command, "") 388 return 389} 390 391// addCommandForPyBinaryRunfilesDir adds commands creating python binary runfiles directory. 392// runfiles directory is created by using MANIFEST file and MANIFEST file is the output of 393// SourceSymlinkManifest action is in aquery output of Bazel py_binary targets, 394// but since SourceSymlinkManifest doesn't contain sufficient information 395// so MANIFEST file could not be created, which also blocks the creation of runfiles directory. 396// See go/python-binary-host-mixed-build for more details. 397// TODO(b/197135294) create runfiles directory from MANIFEST file once it can be created from SourceSymlinkManifest action. 398func addCommandForPyBinaryRunfilesDir(bs BuildStatement, zipperCommandPath, zipFilePath string) string { 399 // Unzip the zip file, zipFilePath looks like <python_binary>.zip 400 runfilesDirName := zipFilePath[0:len(zipFilePath)-4] + ".runfiles" 401 command := fmt.Sprintf("%s x %s -d %s", zipperCommandPath, zipFilePath, runfilesDirName) 402 // Create a symbolic link in <python_binary>.runfiles/, which is the expected structure 403 // when running the python binary stub script. 404 command += fmt.Sprintf(" && ln -sf runfiles/__main__ %s", runfilesDirName) 405 return bs.Command + " && " + command 406} 407 408func isSymlinkAction(a action) bool { 409 return a.Mnemonic == "Symlink" || a.Mnemonic == "SolibSymlink" 410} 411 412func isTemplateExpandAction(a action) bool { 413 return a.Mnemonic == "TemplateExpand" 414} 415 416func isPythonZipperAction(a action) bool { 417 return a.Mnemonic == "PythonZipper" 418} 419 420func shouldSkipAction(a action) bool { 421 // TODO(b/180945121): Handle complex symlink actions. 422 if a.Mnemonic == "SymlinkTree" || a.Mnemonic == "SourceSymlinkManifest" { 423 return true 424 } 425 // Middleman actions are not handled like other actions; they are handled separately as a 426 // preparatory step so that their inputs may be relayed to actions depending on middleman 427 // artifacts. 428 if a.Mnemonic == "Middleman" { 429 return true 430 } 431 // Skip "Fail" actions, which are placeholder actions designed to always fail. 432 if a.Mnemonic == "Fail" { 433 return true 434 } 435 // TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information 436 // about the contents that are written. 437 if a.Mnemonic == "FileWrite" { 438 return true 439 } 440 return false 441} 442 443func expandPathFragment(id int, pathFragmentsMap map[int]pathFragment) (string, error) { 444 labels := []string{} 445 currId := id 446 // Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node. 447 for currId > 0 { 448 currFragment, ok := pathFragmentsMap[currId] 449 if !ok { 450 return "", fmt.Errorf("undefined path fragment id %d", currId) 451 } 452 labels = append([]string{currFragment.Label}, labels...) 453 if currId == currFragment.ParentId { 454 return "", fmt.Errorf("Fragment cannot refer to itself as parent %#v", currFragment) 455 } 456 currId = currFragment.ParentId 457 } 458 return filepath.Join(labels...), nil 459} 460