• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package python
16
17import (
18	"context"
19	"fmt"
20	"log"
21	"os"
22	"path/filepath"
23	"strings"
24
25	sitter "github.com/smacker/go-tree-sitter"
26	"github.com/smacker/go-tree-sitter/python"
27)
28
29const (
30	sitterNodeTypeString              = "string"
31	sitterNodeTypeComment             = "comment"
32	sitterNodeTypeIdentifier          = "identifier"
33	sitterNodeTypeDottedName          = "dotted_name"
34	sitterNodeTypeIfStatement         = "if_statement"
35	sitterNodeTypeAliasedImport       = "aliased_import"
36	sitterNodeTypeWildcardImport      = "wildcard_import"
37	sitterNodeTypeImportStatement     = "import_statement"
38	sitterNodeTypeComparisonOperator  = "comparison_operator"
39	sitterNodeTypeImportFromStatement = "import_from_statement"
40)
41
42type ParserOutput struct {
43	FileName string
44	Modules  []module
45	Comments []comment
46	HasMain  bool
47}
48
49type FileParser struct {
50	code        []byte
51	relFilepath string
52	output      ParserOutput
53}
54
55func NewFileParser() *FileParser {
56	return &FileParser{}
57}
58
59// ParseCode instantiates a new tree-sitter Parser and parses the python code, returning
60// the tree-sitter RootNode.
61// It prints a warning if parsing fails.
62func ParseCode(code []byte, path string) (*sitter.Node, error) {
63	parser := sitter.NewParser()
64	parser.SetLanguage(python.GetLanguage())
65
66	tree, err := parser.ParseCtx(context.Background(), nil, code)
67	if err != nil {
68		return nil, err
69	}
70
71	root := tree.RootNode()
72	if !root.HasError() {
73		return root, nil
74	}
75
76	log.Printf("WARNING: failed to parse %q. The resulting BUILD target may be incorrect.", path)
77
78	// Note: we intentionally do not return an error even when root.HasError because the parse
79	// failure may be in some part of the code that Gazelle doesn't care about.
80	verbose, envExists := os.LookupEnv("RULES_PYTHON_GAZELLE_VERBOSE")
81	if !envExists || verbose != "1" {
82		return root, nil
83	}
84
85	for i := 0; i < int(root.ChildCount()); i++ {
86		child := root.Child(i)
87		if child.IsError() {
88			// Example logs:
89			// gazelle: Parse error at {Row:1 Column:0}:
90			// def search_one_more_level[T]():
91			log.Printf("Parse error at %+v:\n%+v", child.StartPoint(), child.Content(code))
92			// Log the internal tree-sitter representation of what was parsed. Eg:
93			// gazelle: The above was parsed as: (ERROR (identifier) (call function: (list (identifier)) arguments: (argument_list)))
94			log.Printf("The above was parsed as: %v", child.String())
95		}
96	}
97
98	return root, nil
99}
100
101// parseMain returns true if the python file has an `if __name__ == "__main__":` block,
102// which is a common idiom for python scripts/binaries.
103func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool {
104	for i := 0; i < int(node.ChildCount()); i++ {
105		if err := ctx.Err(); err != nil {
106			return false
107		}
108		child := node.Child(i)
109		if child.Type() == sitterNodeTypeIfStatement &&
110			child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" {
111			statement := child.Child(1)
112			a, b := statement.Child(0), statement.Child(2)
113			// convert "'__main__' == __name__" to "__name__ == '__main__'"
114			if b.Type() == sitterNodeTypeIdentifier {
115				a, b = b, a
116			}
117			if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" &&
118				// at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used)
119				// "__main__" is the second child of b. But now, it isn't.
120				// we cannot use the latest go-tree-sitter because of the top level reference in scanner.c.
121				// https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1
122				b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" {
123				return true
124			}
125		}
126	}
127	return false
128}
129
130// parseImportStatement parses a node for an import statement, returning a `module` and a boolean
131// representing if the parse was OK or not.
132func parseImportStatement(node *sitter.Node, code []byte) (module, bool) {
133	switch node.Type() {
134	case sitterNodeTypeDottedName:
135		return module{
136			Name:       node.Content(code),
137			LineNumber: node.StartPoint().Row + 1,
138		}, true
139	case sitterNodeTypeAliasedImport:
140		return parseImportStatement(node.Child(0), code)
141	case sitterNodeTypeWildcardImport:
142		return module{
143			Name:       "*",
144			LineNumber: node.StartPoint().Row + 1,
145		}, true
146	}
147	return module{}, false
148}
149
150// parseImportStatements parses a node for import statements, returning true if the node is
151// an import statement. It updates FileParser.output.Modules with the `module` that the
152// import represents.
153func (p *FileParser) parseImportStatements(node *sitter.Node) bool {
154	if node.Type() == sitterNodeTypeImportStatement {
155		for j := 1; j < int(node.ChildCount()); j++ {
156			m, ok := parseImportStatement(node.Child(j), p.code)
157			if !ok {
158				continue
159			}
160			m.Filepath = p.relFilepath
161			if strings.HasPrefix(m.Name, ".") {
162				continue
163			}
164			p.output.Modules = append(p.output.Modules, m)
165		}
166	} else if node.Type() == sitterNodeTypeImportFromStatement {
167		from := node.Child(1).Content(p.code)
168		if strings.HasPrefix(from, ".") {
169			return true
170		}
171		for j := 3; j < int(node.ChildCount()); j++ {
172			m, ok := parseImportStatement(node.Child(j), p.code)
173			if !ok {
174				continue
175			}
176			m.Filepath = p.relFilepath
177			m.From = from
178			m.Name = fmt.Sprintf("%s.%s", from, m.Name)
179			p.output.Modules = append(p.output.Modules, m)
180		}
181	} else {
182		return false
183	}
184	return true
185}
186
187// parseComments parses a node for comments, returning true if the node is a comment.
188// It updates FileParser.output.Comments with the parsed comment.
189func (p *FileParser) parseComments(node *sitter.Node) bool {
190	if node.Type() == sitterNodeTypeComment {
191		p.output.Comments = append(p.output.Comments, comment(node.Content(p.code)))
192		return true
193	}
194	return false
195}
196
197func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) {
198	p.code = code
199	p.relFilepath = filepath.Join(relPackagePath, filename)
200	p.output.FileName = filename
201}
202
203func (p *FileParser) parse(ctx context.Context, node *sitter.Node) {
204	if node == nil {
205		return
206	}
207	for i := 0; i < int(node.ChildCount()); i++ {
208		if err := ctx.Err(); err != nil {
209			return
210		}
211		child := node.Child(i)
212		if p.parseImportStatements(child) {
213			continue
214		}
215		if p.parseComments(child) {
216			continue
217		}
218		p.parse(ctx, child)
219	}
220}
221
222func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) {
223	rootNode, err := ParseCode(p.code, p.relFilepath)
224	if err != nil {
225		return nil, err
226	}
227
228	p.output.HasMain = p.parseMain(ctx, rootNode)
229
230	p.parse(ctx, rootNode)
231	return &p.output, nil
232}
233
234func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) {
235	code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename))
236	if err != nil {
237		return nil, err
238	}
239	p.SetCodeAndFile(code, relPackagePath, filename)
240	return p.Parse(ctx)
241}
242