1// Copyright 2023 The Bazel Authors. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package python 16 17import ( 18 "bufio" 19 "context" 20 "encoding/json" 21 "fmt" 22 "io" 23 "log" 24 "os" 25 "os/exec" 26 "strings" 27 "sync" 28 29 "github.com/bazelbuild/rules_go/go/tools/bazel" 30 "github.com/emirpasic/gods/sets/treeset" 31 godsutils "github.com/emirpasic/gods/utils" 32) 33 34var ( 35 parserStdin io.WriteCloser 36 parserStdout io.Reader 37 parserMutex sync.Mutex 38) 39 40func startParserProcess(ctx context.Context) { 41 parseScriptRunfile, err := bazel.Runfile("python/parse") 42 if err != nil { 43 log.Printf("failed to initialize parser: %v\n", err) 44 os.Exit(1) 45 } 46 47 cmd := exec.CommandContext(ctx, parseScriptRunfile) 48 49 cmd.Stderr = os.Stderr 50 51 stdin, err := cmd.StdinPipe() 52 if err != nil { 53 log.Printf("failed to initialize parser: %v\n", err) 54 os.Exit(1) 55 } 56 parserStdin = stdin 57 58 stdout, err := cmd.StdoutPipe() 59 if err != nil { 60 log.Printf("failed to initialize parser: %v\n", err) 61 os.Exit(1) 62 } 63 parserStdout = stdout 64 65 if err := cmd.Start(); err != nil { 66 log.Printf("failed to initialize parser: %v\n", err) 67 os.Exit(1) 68 } 69 70 go func() { 71 if err := cmd.Wait(); err != nil { 72 log.Printf("failed to wait for parser: %v\n", err) 73 os.Exit(1) 74 } 75 }() 76} 77 78func shutdownParserProcess() { 79 if err := parserStdin.Close(); err != nil { 80 fmt.Fprintf(os.Stderr, "error closing parser: %v", err) 81 } 82} 83 84// python3Parser implements a parser for Python files that extracts the modules 85// as seen in the import statements. 86type python3Parser struct { 87 // The value of language.GenerateArgs.Config.RepoRoot. 88 repoRoot string 89 // The value of language.GenerateArgs.Rel. 90 relPackagePath string 91 // The function that determines if a dependency is ignored from a Gazelle 92 // directive. It's the signature of pythonconfig.Config.IgnoresDependency. 93 ignoresDependency func(dep string) bool 94} 95 96// newPython3Parser constructs a new python3Parser. 97func newPython3Parser( 98 repoRoot string, 99 relPackagePath string, 100 ignoresDependency func(dep string) bool, 101) *python3Parser { 102 return &python3Parser{ 103 repoRoot: repoRoot, 104 relPackagePath: relPackagePath, 105 ignoresDependency: ignoresDependency, 106 } 107} 108 109// parseSingle parses a single Python file and returns the extracted modules 110// from the import statements as well as the parsed comments. 111func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, error) { 112 pyFilenames := treeset.NewWith(godsutils.StringComparator) 113 pyFilenames.Add(pyFilename) 114 return p.parse(pyFilenames) 115} 116 117// parse parses multiple Python files and returns the extracted modules from 118// the import statements as well as the parsed comments. 119func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, error) { 120 parserMutex.Lock() 121 defer parserMutex.Unlock() 122 123 modules := treeset.NewWith(moduleComparator) 124 125 req := map[string]interface{}{ 126 "repo_root": p.repoRoot, 127 "rel_package_path": p.relPackagePath, 128 "filenames": pyFilenames.Values(), 129 } 130 encoder := json.NewEncoder(parserStdin) 131 if err := encoder.Encode(&req); err != nil { 132 return nil, fmt.Errorf("failed to parse: %w", err) 133 } 134 135 reader := bufio.NewReader(parserStdout) 136 data, err := reader.ReadBytes(0) 137 if err != nil { 138 return nil, fmt.Errorf("failed to parse: %w", err) 139 } 140 data = data[:len(data)-1] 141 var allRes []parserResponse 142 if err := json.Unmarshal(data, &allRes); err != nil { 143 return nil, fmt.Errorf("failed to parse: %w", err) 144 } 145 146 for _, res := range allRes { 147 annotations, err := annotationsFromComments(res.Comments) 148 if err != nil { 149 return nil, fmt.Errorf("failed to parse annotations: %w", err) 150 } 151 152 for _, m := range res.Modules { 153 // Check for ignored dependencies set via an annotation to the Python 154 // module. 155 if annotations.ignores(m.Name) || annotations.ignores(m.From) { 156 continue 157 } 158 159 // Check for ignored dependencies set via a Gazelle directive in a BUILD 160 // file. 161 if p.ignoresDependency(m.Name) || p.ignoresDependency(m.From) { 162 continue 163 } 164 165 modules.Add(m) 166 } 167 } 168 169 return modules, nil 170} 171 172// parserResponse represents a response returned by the parser.py for a given 173// parsed Python module. 174type parserResponse struct { 175 // The modules depended by the parsed module. 176 Modules []module `json:"modules"` 177 // The comments contained in the parsed module. This contains the 178 // annotations as they are comments in the Python module. 179 Comments []comment `json:"comments"` 180} 181 182// module represents a fully-qualified, dot-separated, Python module as seen on 183// the import statement, alongside the line number where it happened. 184type module struct { 185 // The fully-qualified, dot-separated, Python module name as seen on import 186 // statements. 187 Name string `json:"name"` 188 // The line number where the import happened. 189 LineNumber uint32 `json:"lineno"` 190 // The path to the module file relative to the Bazel workspace root. 191 Filepath string `json:"filepath"` 192 // If this was a from import, e.g. from foo import bar, From indicates the module 193 // from which it is imported. 194 From string `json:"from"` 195} 196 197// moduleComparator compares modules by name. 198func moduleComparator(a, b interface{}) int { 199 return godsutils.StringComparator(a.(module).Name, b.(module).Name) 200} 201 202// annotationKind represents Gazelle annotation kinds. 203type annotationKind string 204 205const ( 206 // The Gazelle annotation prefix. 207 annotationPrefix string = "gazelle:" 208 // The ignore annotation kind. E.g. '# gazelle:ignore <module_name>'. 209 annotationKindIgnore annotationKind = "ignore" 210) 211 212// comment represents a Python comment. 213type comment string 214 215// asAnnotation returns an annotation object if the comment has the 216// annotationPrefix. 217func (c *comment) asAnnotation() (*annotation, error) { 218 uncomment := strings.TrimLeft(string(*c), "# ") 219 if !strings.HasPrefix(uncomment, annotationPrefix) { 220 return nil, nil 221 } 222 withoutPrefix := strings.TrimPrefix(uncomment, annotationPrefix) 223 annotationParts := strings.SplitN(withoutPrefix, " ", 2) 224 if len(annotationParts) < 2 { 225 return nil, fmt.Errorf("`%s` requires a value", *c) 226 } 227 return &annotation{ 228 kind: annotationKind(annotationParts[0]), 229 value: annotationParts[1], 230 }, nil 231} 232 233// annotation represents a single Gazelle annotation parsed from a Python 234// comment. 235type annotation struct { 236 kind annotationKind 237 value string 238} 239 240// annotations represent the collection of all Gazelle annotations parsed out of 241// the comments of a Python module. 242type annotations struct { 243 // The parsed modules to be ignored by Gazelle. 244 ignore map[string]struct{} 245} 246 247// annotationsFromComments returns all the annotations parsed out of the 248// comments of a Python module. 249func annotationsFromComments(comments []comment) (*annotations, error) { 250 ignore := make(map[string]struct{}) 251 for _, comment := range comments { 252 annotation, err := comment.asAnnotation() 253 if err != nil { 254 return nil, err 255 } 256 if annotation != nil { 257 if annotation.kind == annotationKindIgnore { 258 modules := strings.Split(annotation.value, ",") 259 for _, m := range modules { 260 if m == "" { 261 continue 262 } 263 m = strings.TrimSpace(m) 264 ignore[m] = struct{}{} 265 } 266 } 267 } 268 } 269 return &annotations{ 270 ignore: ignore, 271 }, nil 272} 273 274// ignored returns true if the given module was ignored via the ignore 275// annotation. 276func (a *annotations) ignores(module string) bool { 277 _, ignores := a.ignore[module] 278 return ignores 279} 280