1# Copyright 2023 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15# parse.py is a long-living program that communicates over STDIN and STDOUT. 16# STDIN receives parse requests, one per line. It outputs the parsed modules and 17# comments from all the files from each request. 18 19import ast 20import concurrent.futures 21import json 22import os 23import sys 24from io import BytesIO 25from tokenize import COMMENT, tokenize 26 27 28def parse_import_statements(content, filepath): 29 modules = list() 30 tree = ast.parse(content, filename=filepath) 31 for node in ast.walk(tree): 32 if isinstance(node, ast.Import): 33 for subnode in node.names: 34 module = { 35 "name": subnode.name, 36 "lineno": node.lineno, 37 "filepath": filepath, 38 "from": "", 39 } 40 modules.append(module) 41 elif isinstance(node, ast.ImportFrom) and node.level == 0: 42 for subnode in node.names: 43 module = { 44 "name": f"{node.module}.{subnode.name}", 45 "lineno": node.lineno, 46 "filepath": filepath, 47 "from": node.module, 48 } 49 modules.append(module) 50 return modules 51 52 53def parse_comments(content): 54 comments = list() 55 g = tokenize(BytesIO(content.encode("utf-8")).readline) 56 for toknum, tokval, _, _, _ in g: 57 if toknum == COMMENT: 58 comments.append(tokval) 59 return comments 60 61 62def parse(repo_root, rel_package_path, filename): 63 rel_filepath = os.path.join(rel_package_path, filename) 64 abs_filepath = os.path.join(repo_root, rel_filepath) 65 with open(abs_filepath, "r") as file: 66 content = file.read() 67 # From simple benchmarks, 2 workers gave the best performance here. 68 with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: 69 modules_future = executor.submit( 70 parse_import_statements, content, rel_filepath 71 ) 72 comments_future = executor.submit(parse_comments, content) 73 modules = modules_future.result() 74 comments = comments_future.result() 75 output = { 76 "modules": modules, 77 "comments": comments, 78 } 79 return output 80 81 82def main(stdin, stdout): 83 with concurrent.futures.ProcessPoolExecutor() as executor: 84 for parse_request in stdin: 85 parse_request = json.loads(parse_request) 86 repo_root = parse_request["repo_root"] 87 rel_package_path = parse_request["rel_package_path"] 88 filenames = parse_request["filenames"] 89 outputs = list() 90 if len(filenames) == 1: 91 outputs.append(parse(repo_root, rel_package_path, filenames[0])) 92 else: 93 futures = [ 94 executor.submit(parse, repo_root, rel_package_path, filename) 95 for filename in filenames 96 if filename != "" 97 ] 98 for future in concurrent.futures.as_completed(futures): 99 outputs.append(future.result()) 100 print(json.dumps(outputs), end="", file=stdout, flush=True) 101 stdout.buffer.write(bytes([0])) 102 stdout.flush() 103 104 105if __name__ == "__main__": 106 exit(main(sys.stdin, sys.stdout)) 107