• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From fd6e7a61a16a17fa155cbd717de0c79001af71e6 Mon Sep 17 00:00:00 2001
2From: Artem Belevich <tra@google.com>
3Date: Mon, 23 Sep 2019 11:18:56 -0700
4Subject: [PATCH] Fix CUDA version detection in CUB
5
6This fixes the problem with CUB using deprecated shfl/vote instructions when CUB
7is compiled with clang (e.g. some TensorFlow builds).
8---
9 cub/util_arch.cuh | 3 ++-
10 cub/util_type.cuh | 4 ++--
11 2 files changed, 4 insertions(+), 3 deletions(-)
12
13diff --git a/cub/util_arch.cuh b/cub/util_arch.cuh
14index 87c5ea2fb..9ad9d1cbb 100644
15--- a/cub/util_arch.cuh
16+++ b/cub/util_arch.cuh
17@@ -44,7 +44,8 @@ namespace cub {
18
19 #ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document
20
21-#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
22+#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \
23+    (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
24     #define CUB_USE_COOPERATIVE_GROUPS
25 #endif
26
27diff --git a/cub/util_type.cuh b/cub/util_type.cuh
28index 0ba41e1ed..b2433d735 100644
29--- a/cub/util_type.cuh
30+++ b/cub/util_type.cuh
31@@ -37,7 +37,7 @@
32 #include <limits>
33 #include <cfloat>
34
35-#if (__CUDACC_VER_MAJOR__ >= 9)
36+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
37     #include <cuda_fp16.h>
38 #endif
39
40@@ -1063,7 +1063,7 @@ struct FpLimits<double>
41 };
42
43
44-#if (__CUDACC_VER_MAJOR__ >= 9)
45+#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
46 template <>
47 struct FpLimits<__half>
48 {
49