1#!/usr/bin/env python3.8 2 3import argparse 4import ast 5import os 6import sys 7import time 8import tokenize 9import _peg_parser 10from glob import glob, escape 11from pathlib import PurePath 12 13from typing import List, Optional, Any, Tuple 14 15sys.path.insert(0, os.getcwd()) 16from pegen.ast_dump import ast_dump 17from pegen.testutil import print_memstats 18from scripts import show_parse 19 20SUCCESS = "\033[92m" 21FAIL = "\033[91m" 22ENDC = "\033[0m" 23 24COMPILE = 2 25PARSE = 1 26NOTREE = 0 27 28argparser = argparse.ArgumentParser( 29 prog="test_parse_directory", 30 description="Helper program to test directories or files for pegen", 31) 32argparser.add_argument("-d", "--directory", help="Directory path containing files to test") 33argparser.add_argument( 34 "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude" 35) 36argparser.add_argument( 37 "-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format" 38) 39argparser.add_argument( 40 "-v", "--verbose", action="store_true", help="Display detailed errors for failures" 41) 42argparser.add_argument( 43 "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 44) 45 46 47def report_status( 48 succeeded: bool, 49 file: str, 50 verbose: bool, 51 error: Optional[Exception] = None, 52 short: bool = False, 53) -> None: 54 if short and succeeded: 55 return 56 57 if succeeded is True: 58 status = "OK" 59 COLOR = SUCCESS 60 else: 61 status = "Fail" 62 COLOR = FAIL 63 64 if short: 65 lineno = 0 66 offset = 0 67 if isinstance(error, SyntaxError): 68 lineno = error.lineno or 1 69 offset = error.offset or 1 70 message = error.args[0] 71 else: 72 message = f"{error.__class__.__name__}: {error}" 73 print(f"{file}:{lineno}:{offset}: {message}") 74 else: 75 print(f"{COLOR}{file:60} {status}{ENDC}") 76 77 if error and verbose: 78 print(f" {str(error.__class__.__name__)}: {error}") 79 80 81def compare_trees( 82 actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, 83) -> int: 84 with open(file) as f: 85 expected_tree = _peg_parser.parse_string(f.read(), oldparser=True) 86 87 expected_text = ast_dump(expected_tree, include_attributes=include_attributes) 88 actual_text = ast_dump(actual_tree, include_attributes=include_attributes) 89 if actual_text == expected_text: 90 if verbose: 91 print("Tree for {file}:") 92 print(show_parse.format_tree(actual_tree, include_attributes)) 93 return 0 94 95 print(f"Diffing ASTs for {file} ...") 96 97 expected = show_parse.format_tree(expected_tree, include_attributes) 98 actual = show_parse.format_tree(actual_tree, include_attributes) 99 100 if verbose: 101 print("Expected for {file}:") 102 print(expected) 103 print("Actual for {file}:") 104 print(actual) 105 print(f"Diff for {file}:") 106 107 diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes) 108 for line in diff: 109 print(line) 110 111 return 1 112 113 114def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]: 115 t0 = time.time() 116 if mode == COMPILE: 117 result = _peg_parser.compile_string( 118 source, 119 filename=file, 120 oldparser=oldparser, 121 ) 122 else: 123 result = _peg_parser.parse_string( 124 source, 125 filename=file, 126 oldparser=oldparser, 127 ast=(mode == PARSE), 128 ) 129 t1 = time.time() 130 return result, t1 - t0 131 132 133def is_parsing_failure(source: str) -> bool: 134 try: 135 _peg_parser.parse_string(source, mode="exec", oldparser=True) 136 except SyntaxError: 137 return False 138 return True 139 140 141def generate_time_stats(files, total_seconds) -> None: 142 total_files = len(files) 143 total_bytes = 0 144 total_lines = 0 145 for file in files: 146 # Count lines and bytes separately 147 with open(file, "rb") as f: 148 total_lines += sum(1 for _ in f) 149 total_bytes += f.tell() 150 151 print( 152 f"Checked {total_files:,} files, {total_lines:,} lines,", 153 f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.", 154 ) 155 if total_seconds > 0: 156 print( 157 f"That's {total_lines / total_seconds :,.0f} lines/sec,", 158 f"or {total_bytes / total_seconds :,.0f} bytes/sec.", 159 ) 160 161 162def parse_directory( 163 directory: str, 164 verbose: bool, 165 excluded_files: List[str], 166 tree_arg: int, 167 short: bool, 168 mode: int, 169 oldparser: bool, 170) -> int: 171 if tree_arg: 172 assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees" 173 174 if oldparser and tree_arg: 175 print("Cannot specify tree argument with the cpython parser.", file=sys.stderr) 176 return 1 177 178 # For a given directory, traverse files and attempt to parse each one 179 # - Output success/failure for each file 180 errors = 0 181 files = [] 182 trees = {} # Trees to compare (after everything else is done) 183 total_seconds = 0 184 185 for file in sorted(glob(os.path.join(escape(directory), f"**/*.py"), recursive=True)): 186 # Only attempt to parse Python files and files that are not excluded 187 if any(PurePath(file).match(pattern) for pattern in excluded_files): 188 continue 189 190 with tokenize.open(file) as f: 191 source = f.read() 192 193 try: 194 result, dt = parse_file(source, file, mode, oldparser) 195 total_seconds += dt 196 if tree_arg: 197 trees[file] = result 198 report_status(succeeded=True, file=file, verbose=verbose, short=short) 199 except SyntaxError as error: 200 if is_parsing_failure(source): 201 print(f"File {file} cannot be parsed by either parser.") 202 else: 203 report_status( 204 succeeded=False, file=file, verbose=verbose, error=error, short=short 205 ) 206 errors += 1 207 files.append(file) 208 209 t1 = time.time() 210 211 generate_time_stats(files, total_seconds) 212 if short: 213 print_memstats() 214 215 if errors: 216 print(f"Encountered {errors} failures.", file=sys.stderr) 217 218 # Compare trees (the dict is empty unless -t is given) 219 compare_trees_errors = 0 220 for file, tree in trees.items(): 221 if not short: 222 print("Comparing ASTs for", file) 223 if compare_trees(tree, file, verbose, tree_arg >= 2) == 1: 224 compare_trees_errors += 1 225 226 if errors or compare_trees_errors: 227 return 1 228 229 return 0 230 231 232def main() -> None: 233 args = argparser.parse_args() 234 directory = args.directory 235 verbose = args.verbose 236 excluded_files = args.exclude 237 tree = args.tree 238 short = args.short 239 mode = 1 if args.tree else 2 240 sys.exit( 241 parse_directory( 242 directory, 243 verbose, 244 excluded_files, 245 tree, 246 short, 247 mode, 248 oldparser=False, 249 ) 250 ) 251 252 253if __name__ == "__main__": 254 main() 255