1#!/usr/bin/env python3.8 2 3import argparse 4import os 5import glob 6import tarfile 7import zipfile 8import shutil 9import pathlib 10import sys 11 12from typing import Generator, Any 13 14sys.path.insert(0, ".") 15 16from pegen import build 17from scripts import test_parse_directory 18 19HERE = pathlib.Path(__file__).resolve().parent 20 21argparser = argparse.ArgumentParser( 22 prog="test_pypi_packages", description="Helper program to test parsing PyPI packages", 23) 24argparser.add_argument( 25 "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 26) 27 28 29def get_packages() -> Generator[str, None, None]: 30 all_packages = ( 31 glob.glob("./data/pypi/*.tar.gz") 32 + glob.glob("./data/pypi/*.zip") 33 + glob.glob("./data/pypi/*.tgz") 34 ) 35 for package in all_packages: 36 yield package 37 38 39def extract_files(filename: str) -> None: 40 savedir = os.path.join("data", "pypi") 41 if tarfile.is_tarfile(filename): 42 tarfile.open(filename).extractall(savedir) 43 elif zipfile.is_zipfile(filename): 44 zipfile.ZipFile(filename).extractall(savedir) 45 else: 46 raise ValueError(f"Could not identify type of compressed file {filename}") 47 48 49def find_dirname(package_name: str) -> str: 50 for name in os.listdir(os.path.join("data", "pypi")): 51 full_path = os.path.join("data", "pypi", name) 52 if os.path.isdir(full_path) and name in package_name: 53 return full_path 54 assert False # This is to fix mypy, should never be reached 55 56 57def run_tests(dirname: str, tree: int) -> int: 58 return test_parse_directory.parse_directory( 59 dirname, 60 verbose=False, 61 excluded_files=[], 62 tree_arg=tree, 63 short=True, 64 mode=1 if tree else 0, 65 parser="pegen", 66 ) 67 68 69def main() -> None: 70 args = argparser.parse_args() 71 tree = args.tree 72 73 for package in get_packages(): 74 print(f"Extracting files from {package}... ", end="") 75 try: 76 extract_files(package) 77 print("Done") 78 except ValueError as e: 79 print(e) 80 continue 81 82 print(f"Trying to parse all python files ... ") 83 dirname = find_dirname(package) 84 status = run_tests(dirname, tree) 85 if status == 0: 86 shutil.rmtree(dirname) 87 else: 88 print(f"Failed to parse {dirname}") 89 90 91if __name__ == "__main__": 92 main() 93