• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# parse.py is a long-living program that communicates over STDIN and STDOUT.
16# STDIN receives parse requests, one per line. It outputs the parsed modules and
17# comments from all the files from each request.
18
19import ast
20import concurrent.futures
21import json
22import os
23import sys
24from io import BytesIO
25from tokenize import COMMENT, tokenize
26
27
28def parse_import_statements(content, filepath):
29    modules = list()
30    tree = ast.parse(content, filename=filepath)
31    for node in ast.walk(tree):
32        if isinstance(node, ast.Import):
33            for subnode in node.names:
34                module = {
35                    "name": subnode.name,
36                    "lineno": node.lineno,
37                    "filepath": filepath,
38                    "from": "",
39                }
40                modules.append(module)
41        elif isinstance(node, ast.ImportFrom) and node.level == 0:
42            for subnode in node.names:
43                module = {
44                    "name": f"{node.module}.{subnode.name}",
45                    "lineno": node.lineno,
46                    "filepath": filepath,
47                    "from": node.module,
48                }
49                modules.append(module)
50    return modules
51
52
53def parse_comments(content):
54    comments = list()
55    g = tokenize(BytesIO(content.encode("utf-8")).readline)
56    for toknum, tokval, _, _, _ in g:
57        if toknum == COMMENT:
58            comments.append(tokval)
59    return comments
60
61
62def parse(repo_root, rel_package_path, filename):
63    rel_filepath = os.path.join(rel_package_path, filename)
64    abs_filepath = os.path.join(repo_root, rel_filepath)
65    with open(abs_filepath, "r") as file:
66        content = file.read()
67        # From simple benchmarks, 2 workers gave the best performance here.
68        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
69            modules_future = executor.submit(
70                parse_import_statements, content, rel_filepath
71            )
72            comments_future = executor.submit(parse_comments, content)
73        modules = modules_future.result()
74        comments = comments_future.result()
75        output = {
76            "modules": modules,
77            "comments": comments,
78        }
79        return output
80
81
82def main(stdin, stdout):
83    with concurrent.futures.ProcessPoolExecutor() as executor:
84        for parse_request in stdin:
85            parse_request = json.loads(parse_request)
86            repo_root = parse_request["repo_root"]
87            rel_package_path = parse_request["rel_package_path"]
88            filenames = parse_request["filenames"]
89            outputs = list()
90            if len(filenames) == 1:
91                outputs.append(parse(repo_root, rel_package_path, filenames[0]))
92            else:
93                futures = [
94                    executor.submit(parse, repo_root, rel_package_path, filename)
95                    for filename in filenames
96                    if filename != ""
97                ]
98                for future in concurrent.futures.as_completed(futures):
99                    outputs.append(future.result())
100            print(json.dumps(outputs), end="", file=stdout, flush=True)
101            stdout.buffer.write(bytes([0]))
102            stdout.flush()
103
104
105if __name__ == "__main__":
106    exit(main(sys.stdin, sys.stdout))
107