• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3.8
2
3import argparse
4import os
5import glob
6import tarfile
7import zipfile
8import shutil
9import pathlib
10import sys
11
12from typing import Generator, Any
13
14sys.path.insert(0, ".")
15
16from pegen import build
17from scripts import test_parse_directory
18
19HERE = pathlib.Path(__file__).resolve().parent
20
21argparser = argparse.ArgumentParser(
22    prog="test_pypi_packages",
23    description="Helper program to test parsing PyPI packages",
24)
25argparser.add_argument(
26    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
27)
28
29
30def get_packages() -> Generator[str, None, None]:
31    all_packages = (
32        glob.glob("./data/pypi/*.tar.gz")
33        + glob.glob("./data/pypi/*.zip")
34        + glob.glob("./data/pypi/*.tgz")
35    )
36    for package in all_packages:
37        yield package
38
39
40def extract_files(filename: str) -> None:
41    savedir = os.path.join("data", "pypi")
42    if tarfile.is_tarfile(filename):
43        tarfile.open(filename).extractall(savedir)
44    elif zipfile.is_zipfile(filename):
45        zipfile.ZipFile(filename).extractall(savedir)
46    else:
47        raise ValueError(f"Could not identify type of compressed file {filename}")
48
49
50def find_dirname(package_name: str) -> str:
51    for name in os.listdir(os.path.join("data", "pypi")):
52        full_path = os.path.join("data", "pypi", name)
53        if os.path.isdir(full_path) and name in package_name:
54            return full_path
55    assert False  # This is to fix mypy, should never be reached
56
57
58def run_tests(dirname: str, tree: int) -> int:
59    return test_parse_directory.parse_directory(
60        dirname,
61        verbose=False,
62        excluded_files=[],
63        tree_arg=tree,
64        short=True,
65        mode=1 if tree else 0,
66        parser="pegen",
67    )
68
69
70def main() -> None:
71    args = argparser.parse_args()
72    tree = args.tree
73
74    for package in get_packages():
75        print(f"Extracting files from {package}... ", end="")
76        try:
77            extract_files(package)
78            print("Done")
79        except ValueError as e:
80            print(e)
81            continue
82
83        print(f"Trying to parse all python files ... ")
84        dirname = find_dirname(package)
85        status = run_tests(dirname, tree)
86        if status == 0:
87            shutil.rmtree(dirname)
88        else:
89            print(f"Failed to parse {dirname}")
90
91
92if __name__ == "__main__":
93    main()
94