1# Synopsis: 2# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures]) 3# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures 4# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...) 5# - "Auto" detects local machine GPU compute arch at runtime. 6# - "Common" and "All" cover common and entire subsets of architectures 7# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX 8# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing Ampere 9# NUM: Any number. Only those pairs are currently accepted by NVCC though: 10# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5 8.0 8.6 11# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} 12# Additionally, sets ${out_variable}_readable to the resulting numeric list 13# Example: 14# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell) 15# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) 16# 17# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA 18# 19 20if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language 21 if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" 22 AND CMAKE_CUDA_COMPILER_VERSION MATCHES "^([0-9]+\\.[0-9]+)") 23 set(CUDA_VERSION "${CMAKE_MATCH_1}") 24 endif() 25endif() 26 27# See: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list 28# Additions, deprecations, and removals can be found in the release notes: 29# https://developer.nvidia.com/cuda-toolkit-archive 30 31# The initial status here is for CUDA 7.0 32set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell" "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") 33set(CUDA_COMMON_GPU_ARCHITECTURES "2.0" "2.1" "3.0" "3.5" "5.0" "5.3") 34set(CUDA_LIMIT_GPU_ARCHITECTURE "6.0") 35set(CUDA_ALL_GPU_ARCHITECTURES "2.0" "2.1" "3.0" "3.2" "3.5" "3.7" "5.0" "5.2" "5.3") 36set(_CUDA_MAX_COMMON_ARCHITECTURE "5.2+PTX") 37 38 39if(CUDA_VERSION VERSION_GREATER_EQUAL "8.0") 40 list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") 41 list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1") 42 list(APPEND CUDA_ALL_GPU_ARCHITECTURES "6.0" "6.1" "6.2") 43 44 set(_CUDA_MAX_COMMON_ARCHITECTURE "6.2+PTX") 45 set(CUDA_LIMIT_GPU_ARCHITECTURE "7.0") 46 47 list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "2.0" "2.1") 48endif () 49 50if(CUDA_VERSION VERSION_GREATER_EQUAL "9.0") 51 list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Volta") 52 list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0") 53 list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.0" "7.2") 54 55 set(_CUDA_MAX_COMMON_ARCHITECTURE "7.2+PTX") 56 set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0") 57 58 list(REMOVE_ITEM CUDA_KNOWN_GPU_ARCHITECTURES "Fermi") 59 list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "2.0" "2.1") 60endif() 61 62if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") 63 list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Turing") 64 list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5") 65 list(APPEND CUDA_ALL_GPU_ARCHITECTURES "7.5") 66 67 set(_CUDA_MAX_COMMON_ARCHITECTURE "7.5+PTX") 68 set(CUDA_LIMIT_GPU_ARCHITECTURE "8.0") 69 70 list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.0") 71endif() 72 73# https://docs.nvidia.com/cuda/archive/11.0/cuda-toolkit-release-notes/index.html#cuda-general-new-features 74# https://docs.nvidia.com/cuda/archive/11.0/cuda-toolkit-release-notes/index.html#deprecated-features 75if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") 76 list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Ampere") 77 list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.0") 78 list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.0") 79 80 set(_CUDA_MAX_COMMON_ARCHITECTURE "8.0+PTX") 81 set(CUDA_LIMIT_GPU_ARCHITECTURE "8.6") 82 83 list(REMOVE_ITEM CUDA_COMMON_GPU_ARCHITECTURES "3.5" "5.0") 84 list(REMOVE_ITEM CUDA_ALL_GPU_ARCHITECTURES "3.0" "3.2") 85endif() 86 87if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") 88 list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "8.6") 89 list(APPEND CUDA_ALL_GPU_ARCHITECTURES "8.6") 90 91 set(_CUDA_MAX_COMMON_ARCHITECTURE "8.6+PTX") 92 set(CUDA_LIMIT_GPU_ARCHITECTURE "9.0") 93endif() 94 95list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "${_CUDA_MAX_COMMON_ARCHITECTURE}") 96 97# Check with: cmake -DCUDA_VERSION=7.0 -P select_compute_arch.cmake 98if(DEFINED CMAKE_SCRIPT_MODE_FILE) 99 include(CMakePrintHelpers) 100 cmake_print_variables(CUDA_KNOWN_GPU_ARCHITECTURES) 101 cmake_print_variables(CUDA_COMMON_GPU_ARCHITECTURES) 102 cmake_print_variables(CUDA_LIMIT_GPU_ARCHITECTURE) 103 cmake_print_variables(CUDA_ALL_GPU_ARCHITECTURES) 104endif() 105 106 107################################################################################################ 108# A function for automatic detection of GPUs installed (if autodetection is enabled) 109# Usage: 110# CUDA_DETECT_INSTALLED_GPUS(OUT_VARIABLE) 111# 112function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE) 113 if(NOT CUDA_GPU_DETECT_OUTPUT) 114 if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language 115 set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cu") 116 else() 117 set(file "${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cpp") 118 endif() 119 120 file(WRITE ${file} "" 121 "#include <cuda_runtime.h>\n" 122 "#include <cstdio>\n" 123 "int main()\n" 124 "{\n" 125 " int count = 0;\n" 126 " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" 127 " if (count == 0) return -1;\n" 128 " for (int device = 0; device < count; ++device)\n" 129 " {\n" 130 " cudaDeviceProp prop;\n" 131 " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" 132 " std::printf(\"%d.%d \", prop.major, prop.minor);\n" 133 " }\n" 134 " return 0;\n" 135 "}\n") 136 137 if(CMAKE_CUDA_COMPILER_LOADED) # CUDA as a language 138 try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file} 139 RUN_OUTPUT_VARIABLE compute_capabilities) 140 else() 141 try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file} 142 CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" 143 LINK_LIBRARIES ${CUDA_LIBRARIES} 144 RUN_OUTPUT_VARIABLE compute_capabilities) 145 endif() 146 147 # Filter unrelated content out of the output. 148 string(REGEX MATCHALL "[0-9]+\\.[0-9]+" compute_capabilities "${compute_capabilities}") 149 150 if(run_result EQUAL 0) 151 string(REPLACE "2.1" "2.1(2.0)" compute_capabilities "${compute_capabilities}") 152 set(CUDA_GPU_DETECT_OUTPUT ${compute_capabilities} 153 CACHE INTERNAL "Returned GPU architectures from detect_gpus tool" FORCE) 154 endif() 155 endif() 156 157 if(NOT CUDA_GPU_DETECT_OUTPUT) 158 message(STATUS "Automatic GPU detection failed. Building for common architectures.") 159 set(${OUT_VARIABLE} ${CUDA_COMMON_GPU_ARCHITECTURES} PARENT_SCOPE) 160 else() 161 # Filter based on CUDA version supported archs 162 set(CUDA_GPU_DETECT_OUTPUT_FILTERED "") 163 separate_arguments(CUDA_GPU_DETECT_OUTPUT) 164 foreach(ITEM IN ITEMS ${CUDA_GPU_DETECT_OUTPUT}) 165 if(CUDA_LIMIT_GPU_ARCHITECTURE AND ITEM VERSION_GREATER_EQUAL CUDA_LIMIT_GPU_ARCHITECTURE) 166 list(GET CUDA_COMMON_GPU_ARCHITECTURES -1 NEWITEM) 167 string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${NEWITEM}") 168 else() 169 string(APPEND CUDA_GPU_DETECT_OUTPUT_FILTERED " ${ITEM}") 170 endif() 171 endforeach() 172 173 set(${OUT_VARIABLE} ${CUDA_GPU_DETECT_OUTPUT_FILTERED} PARENT_SCOPE) 174 endif() 175endfunction() 176 177 178################################################################################################ 179# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list 180# Usage: 181# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs]) 182function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) 183 set(CUDA_ARCH_LIST "${ARGN}") 184 185 if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) 186 set(CUDA_ARCH_LIST "Auto") 187 endif() 188 189 set(cuda_arch_bin) 190 set(cuda_arch_ptx) 191 192 if("${CUDA_ARCH_LIST}" STREQUAL "All") 193 set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) 194 elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") 195 set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) 196 elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto") 197 CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST) 198 message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") 199 endif() 200 201 # Now process the list and look for names 202 string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") 203 list(REMOVE_DUPLICATES CUDA_ARCH_LIST) 204 foreach(arch_name ${CUDA_ARCH_LIST}) 205 set(arch_bin) 206 set(arch_ptx) 207 set(add_ptx FALSE) 208 # Check to see if we are compiling PTX 209 if(arch_name MATCHES "(.*)\\+PTX$") 210 set(add_ptx TRUE) 211 set(arch_name ${CMAKE_MATCH_1}) 212 endif() 213 if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") 214 set(arch_bin ${CMAKE_MATCH_1}) 215 set(arch_ptx ${arch_bin}) 216 else() 217 # Look for it in our list of known architectures 218 if(${arch_name} STREQUAL "Fermi") 219 set(arch_bin 2.0 "2.1(2.0)") 220 elseif(${arch_name} STREQUAL "Kepler+Tegra") 221 set(arch_bin 3.2) 222 elseif(${arch_name} STREQUAL "Kepler+Tesla") 223 set(arch_bin 3.7) 224 elseif(${arch_name} STREQUAL "Kepler") 225 set(arch_bin 3.0 3.5) 226 set(arch_ptx 3.5) 227 elseif(${arch_name} STREQUAL "Maxwell+Tegra") 228 set(arch_bin 5.3) 229 elseif(${arch_name} STREQUAL "Maxwell") 230 set(arch_bin 5.0 5.2) 231 set(arch_ptx 5.2) 232 elseif(${arch_name} STREQUAL "Pascal") 233 set(arch_bin 6.0 6.1) 234 set(arch_ptx 6.1) 235 elseif(${arch_name} STREQUAL "Volta") 236 set(arch_bin 7.0 7.0) 237 set(arch_ptx 7.0) 238 elseif(${arch_name} STREQUAL "Turing") 239 set(arch_bin 7.5) 240 set(arch_ptx 7.5) 241 elseif(${arch_name} STREQUAL "Ampere") 242 set(arch_bin 8.0) 243 set(arch_ptx 8.0) 244 else() 245 message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") 246 endif() 247 endif() 248 if(NOT arch_bin) 249 message(SEND_ERROR "arch_bin wasn't set for some reason") 250 endif() 251 list(APPEND cuda_arch_bin ${arch_bin}) 252 if(add_ptx) 253 if (NOT arch_ptx) 254 set(arch_ptx ${arch_bin}) 255 endif() 256 list(APPEND cuda_arch_ptx ${arch_ptx}) 257 endif() 258 endforeach() 259 260 # remove dots and convert to lists 261 string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") 262 string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") 263 string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") 264 string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") 265 266 if(cuda_arch_bin) 267 list(REMOVE_DUPLICATES cuda_arch_bin) 268 endif() 269 if(cuda_arch_ptx) 270 list(REMOVE_DUPLICATES cuda_arch_ptx) 271 endif() 272 273 set(nvcc_flags "") 274 set(nvcc_archs_readable "") 275 276 # Tell NVCC to add binaries for the specified GPUs 277 foreach(arch ${cuda_arch_bin}) 278 if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") 279 # User explicitly specified ARCH for the concrete CODE 280 list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) 281 list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) 282 else() 283 # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE 284 list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) 285 list(APPEND nvcc_archs_readable sm_${arch}) 286 endif() 287 endforeach() 288 289 # Tell NVCC to add PTX intermediate code for the specified architectures 290 foreach(arch ${cuda_arch_ptx}) 291 list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) 292 list(APPEND nvcc_archs_readable compute_${arch}) 293 endforeach() 294 295 string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") 296 set(${out_variable} ${nvcc_flags} PARENT_SCOPE) 297 set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) 298endfunction() 299