1From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001 2From: Artem Belevich <tra@google.com> 3Date: Mon, 23 Sep 2019 11:18:56 -0700 4Subject: [PATCH] Fix CUDA version detection in CUB 5 6This fixes the problem with CUB using deprecated shfl/vote instructions when CUB 7is compiled with clang (e.g. some TensorFlow builds). 8--- 9 cub/util_arch.cuh | 3 ++- 10 cub/util_type.cuh | 4 ++-- 11 2 files changed, 4 insertions(+), 3 deletions(-) 12 13diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh 14index 87c5ea2fb..9ad9d1cbb 100644 15--- a/cub/util_arch.cuh 16+++ b/cub/util_arch.cuh 17@@ -44,7 +44,8 @@ namespace cub { 18 19 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 20 21-#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) 22+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \ 23+ (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) 24 #define CUB_USE_COOPERATIVE_GROUPS 25 #endif 26 27diff --git a/cub/util_type.cuh b/cub/util_type.cuh 28index 0ba41e1ed..b2433d735 100644 29--- a/cub/util_type.cuh 30+++ b/cub/util_type.cuh 31@@ -37,7 +37,7 @@ 32 #include <limits> 33 #include <cfloat> 34 35-#if (__CUDACC_VER_MAJOR__ >= 9) 36+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) 37 #include <cuda_fp16.h> 38 #endif 39 40@@ -1063,7 +1063,7 @@ struct FpLimits<double> 41 }; 42 43 44-#if (__CUDACC_VER_MAJOR__ >= 9) 45+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000) 46 template <> 47 struct FpLimits<__half> 48 { 49