• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3"""Generates a matrix to be utilized through github actions
4
5Will output a condensed version of the matrix if on a pull request that only
6includes the latest version of python we support built on three different
7architectures:
8    * CPU
9    * Latest CUDA
10    * Latest ROCM
11"""
12
13import os
14from typing import Dict, List, Optional, Tuple
15
16CUDA_ARCHES = ["11.8", "12.1", "12.4"]
17
18
19CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1", "12.4": "12.4.0"}
20
21
22CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.1": "9", "12.4": "9"}
23
24
25ROCM_ARCHES = ["6.0", "6.1"]
26
27
28CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
29
30
31CPU_AARCH64_ARCH = ["cpu-aarch64"]
32
33
34CPU_S390X_ARCH = ["cpu-s390x"]
35
36
37CUDA_AARCH64_ARCH = ["cuda-aarch64"]
38
39
40PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
41    "11.8": (
42        "nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
43        "nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
44        "nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
45        "nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
46        "nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
47        "nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
48        "nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
49        "nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
50        "nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
51        "nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
52        "nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
53    ),
54    "12.1": (
55        "nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
56        "nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
57        "nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
58        "nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
59        "nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
60        "nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
61        "nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
62        "nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
63        "nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
64        "nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
65        "nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
66    ),
67    "12.4": (
68        "nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
69        "nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
70        "nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
71        "nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
72        "nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
73        "nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | "
74        "nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | "
75        "nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
76        "nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | "
77        "nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
78        "nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
79        "nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'"
80    ),
81}
82
83
84def get_nccl_submodule_version() -> str:
85    from pathlib import Path
86
87    nccl_version_mk = (
88        Path(__file__).absolute().parent.parent.parent
89        / "third_party"
90        / "nccl"
91        / "nccl"
92        / "makefiles"
93        / "version.mk"
94    )
95    if not nccl_version_mk.exists():
96        raise RuntimeError(
97            "Please make sure that nccl submodule is checked out when importing this script"
98        )
99    with nccl_version_mk.open("r") as f:
100        content = f.read()
101    d = {}
102    for l in content.split("\n"):
103        if not l.startswith("NCCL_"):
104            continue
105        (k, v) = l.split(":=")
106        d[k.strip()] = v.strip()
107    return f"{d['NCCL_MAJOR']}.{d['NCCL_MINOR']}.{d['NCCL_PATCH']}"
108
109
110def get_nccl_wheel_version(arch_version: str) -> str:
111    import re
112
113    requirements = map(
114        str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
115    )
116    return next(x for x in requirements if x.startswith("nvidia-nccl-cu")).split("==")[
117        1
118    ]
119
120
121def validate_nccl_dep_consistency(arch_version: str) -> None:
122    wheel_ver = get_nccl_wheel_version(arch_version)
123    submodule_ver = get_nccl_submodule_version()
124    if wheel_ver != submodule_ver:
125        raise RuntimeError(
126            f"NCCL submodule version {submodule_ver} differs from wheel version {wheel_ver}"
127        )
128
129
130def arch_type(arch_version: str) -> str:
131    if arch_version in CUDA_ARCHES:
132        return "cuda"
133    elif arch_version in ROCM_ARCHES:
134        return "rocm"
135    elif arch_version in CPU_CXX11_ABI_ARCH:
136        return "cpu-cxx11-abi"
137    elif arch_version in CPU_AARCH64_ARCH:
138        return "cpu-aarch64"
139    elif arch_version in CPU_S390X_ARCH:
140        return "cpu-s390x"
141    elif arch_version in CUDA_AARCH64_ARCH:
142        return "cuda-aarch64"
143    else:  # arch_version should always be "cpu" in this case
144        return "cpu"
145
146
147# This can be updated to the release version when cutting release branch, i.e. 2.1
148DEFAULT_TAG = os.getenv("RELEASE_VERSION_TAG", "main")
149
150WHEEL_CONTAINER_IMAGES = {
151    **{
152        gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
153        for gpu_arch in CUDA_ARCHES
154    },
155    **{
156        gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
157        for gpu_arch in ROCM_ARCHES
158    },
159    "cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
160    "cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
161    "cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
162    "cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
163    "cuda-aarch64": f"pytorch/manylinuxaarch64-builder:cuda12.4-{DEFAULT_TAG}",
164}
165
166CONDA_CONTAINER_IMAGES = {
167    **{
168        gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
169        for gpu_arch in CUDA_ARCHES
170    },
171    "cpu": f"pytorch/conda-builder:cpu-{DEFAULT_TAG}",
172}
173
174PRE_CXX11_ABI = "pre-cxx11"
175CXX11_ABI = "cxx11-abi"
176RELEASE = "release"
177DEBUG = "debug"
178
179LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
180    **{
181        (
182            gpu_arch,
183            PRE_CXX11_ABI,
184        ): f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
185        for gpu_arch in CUDA_ARCHES
186    },
187    **{
188        (
189            gpu_arch,
190            CXX11_ABI,
191        ): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
192        for gpu_arch in CUDA_ARCHES
193    },
194    **{
195        (
196            gpu_arch,
197            PRE_CXX11_ABI,
198        ): f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
199        for gpu_arch in ROCM_ARCHES
200    },
201    **{
202        (
203            gpu_arch,
204            CXX11_ABI,
205        ): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
206        for gpu_arch in ROCM_ARCHES
207    },
208    ("cpu", PRE_CXX11_ABI): f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
209    ("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
210}
211
212FULL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11", "3.12"]
213
214
215def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
216    return {
217        "cpu": "cpu",
218        "cpu-aarch64": "cpu",
219        "cpu-cxx11-abi": "cpu-cxx11-abi",
220        "cpu-s390x": "cpu",
221        "cuda": f"cu{gpu_arch_version.replace('.', '')}",
222        "cuda-aarch64": "cu124",
223        "rocm": f"rocm{gpu_arch_version}",
224    }.get(gpu_arch_type, gpu_arch_version)
225
226
227def list_without(in_list: List[str], without: List[str]) -> List[str]:
228    return [item for item in in_list if item not in without]
229
230
231def generate_conda_matrix(os: str) -> List[Dict[str, str]]:
232    ret: List[Dict[str, str]] = []
233    arches = ["cpu"]
234    python_versions = FULL_PYTHON_VERSIONS
235    if os == "linux" or os == "windows":
236        arches += CUDA_ARCHES
237    for python_version in python_versions:
238        # We don't currently build conda packages for rocm
239        for arch_version in arches:
240            gpu_arch_type = arch_type(arch_version)
241            gpu_arch_version = "" if arch_version == "cpu" else arch_version
242            ret.append(
243                {
244                    "python_version": python_version,
245                    "gpu_arch_type": gpu_arch_type,
246                    "gpu_arch_version": gpu_arch_version,
247                    "desired_cuda": translate_desired_cuda(
248                        gpu_arch_type, gpu_arch_version
249                    ),
250                    "container_image": CONDA_CONTAINER_IMAGES[arch_version],
251                    "package_type": "conda",
252                    "build_name": f"conda-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
253                        ".", "_"
254                    ),
255                }
256            )
257    return ret
258
259
260def generate_libtorch_matrix(
261    os: str,
262    abi_version: str,
263    arches: Optional[List[str]] = None,
264    libtorch_variants: Optional[List[str]] = None,
265) -> List[Dict[str, str]]:
266    if arches is None:
267        arches = ["cpu"]
268        if os == "linux":
269            arches += CUDA_ARCHES
270            arches += ROCM_ARCHES
271        elif os == "windows":
272            arches += CUDA_ARCHES
273
274    if libtorch_variants is None:
275        libtorch_variants = [
276            "shared-with-deps",
277            "shared-without-deps",
278            "static-with-deps",
279            "static-without-deps",
280        ]
281
282    ret: List[Dict[str, str]] = []
283    for arch_version in arches:
284        for libtorch_variant in libtorch_variants:
285            # one of the values in the following list must be exactly
286            # CXX11_ABI, but the precise value of the other one doesn't
287            # matter
288            gpu_arch_type = arch_type(arch_version)
289            gpu_arch_version = "" if arch_version == "cpu" else arch_version
290            # ROCm builds without-deps failed even in ROCm runners; skip for now
291            if gpu_arch_type == "rocm" and "without-deps" in libtorch_variant:
292                continue
293            ret.append(
294                {
295                    "gpu_arch_type": gpu_arch_type,
296                    "gpu_arch_version": gpu_arch_version,
297                    "desired_cuda": translate_desired_cuda(
298                        gpu_arch_type, gpu_arch_version
299                    ),
300                    "libtorch_variant": libtorch_variant,
301                    "libtorch_config": abi_version if os == "windows" else "",
302                    "devtoolset": abi_version if os != "windows" else "",
303                    "container_image": (
304                        LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)]
305                        if os != "windows"
306                        else ""
307                    ),
308                    "package_type": "libtorch",
309                    "build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
310                        ".", "_"
311                    ),
312                }
313            )
314    return ret
315
316
317def generate_wheels_matrix(
318    os: str,
319    arches: Optional[List[str]] = None,
320    python_versions: Optional[List[str]] = None,
321) -> List[Dict[str, str]]:
322    package_type = "wheel"
323    if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
324        # NOTE: We only build manywheel packages for x86_64 and aarch64 and s390x linux
325        package_type = "manywheel"
326
327    if python_versions is None:
328        python_versions = FULL_PYTHON_VERSIONS
329
330    if arches is None:
331        # Define default compute archivectures
332        arches = ["cpu"]
333        if os == "linux":
334            arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES
335        elif os == "windows":
336            arches += CUDA_ARCHES
337        elif os == "linux-aarch64":
338            # Only want the one arch as the CPU type is different and
339            # uses different build/test scripts
340            arches = ["cpu-aarch64", "cuda-aarch64"]
341        elif os == "linux-s390x":
342            # Only want the one arch as the CPU type is different and
343            # uses different build/test scripts
344            arches = ["cpu-s390x"]
345
346    ret: List[Dict[str, str]] = []
347    for python_version in python_versions:
348        for arch_version in arches:
349            gpu_arch_type = arch_type(arch_version)
350            gpu_arch_version = (
351                ""
352                if arch_version == "cpu"
353                or arch_version == "cpu-cxx11-abi"
354                or arch_version == "cpu-aarch64"
355                or arch_version == "cpu-s390x"
356                or arch_version == "cuda-aarch64"
357                else arch_version
358            )
359
360            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
361            if (
362                arch_version in ["12.4", "12.1", "11.8"]
363                and os == "linux"
364                or arch_version == "cuda-aarch64"
365            ):
366                ret.append(
367                    {
368                        "python_version": python_version,
369                        "gpu_arch_type": gpu_arch_type,
370                        "gpu_arch_version": gpu_arch_version,
371                        "desired_cuda": translate_desired_cuda(
372                            gpu_arch_type, gpu_arch_version
373                        ),
374                        "devtoolset": (
375                            "cxx11-abi" if arch_version == "cuda-aarch64" else ""
376                        ),
377                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
378                        "package_type": package_type,
379                        "pytorch_extra_install_requirements": (
380                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]  # fmt: skip
381                            if os != "linux-aarch64"
382                            else ""
383                        ),
384                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(  # noqa: B950
385                            ".", "_"
386                        ),
387                    }
388                )
389                # Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
390                if (
391                    arch_version != "cuda-aarch64"
392                    and python_version == "3.10"
393                    and arch_version == "12.1"
394                ):
395                    ret.append(
396                        {
397                            "python_version": python_version,
398                            "gpu_arch_type": gpu_arch_type,
399                            "gpu_arch_version": gpu_arch_version,
400                            "desired_cuda": translate_desired_cuda(
401                                gpu_arch_type, gpu_arch_version
402                            ),
403                            "devtoolset": "",
404                            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
405                            "package_type": package_type,
406                            "pytorch_extra_install_requirements": "",
407                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
408                                ".", "_"
409                            ),
410                        }
411                    )
412            else:
413                ret.append(
414                    {
415                        "python_version": python_version,
416                        "gpu_arch_type": gpu_arch_type,
417                        "gpu_arch_version": gpu_arch_version,
418                        "desired_cuda": translate_desired_cuda(
419                            gpu_arch_type, gpu_arch_version
420                        ),
421                        "devtoolset": (
422                            "cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
423                        ),
424                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
425                        "package_type": package_type,
426                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
427                            ".", "_"
428                        ),
429                        "pytorch_extra_install_requirements": (
430                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]  # fmt: skip
431                            if os != "linux"
432                            else ""
433                        ),
434                    }
435                )
436    return ret
437
438
439validate_nccl_dep_consistency("12.4")
440validate_nccl_dep_consistency("12.1")
441validate_nccl_dep_consistency("11.8")
442