• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
17     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
18 
19 #define EIGEN_USE_GPU
20 
21 #include "tensorflow/core/framework/bfloat16.h"
22 #define SPECIALIZE_FOR_GPUS
23 #include "tensorflow/core/kernels/cast_op.h"
24 #undef SPECIALIZE_FOR_GPUS
25 
26 namespace tensorflow {
27 namespace functor {
28 
29 typedef Eigen::GpuDevice GPUDevice;
30 
31 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
32 CAST_FUNCTORS_SUBSET(GPUDevice);
33 #else
34 CAST_FUNCTORS(GPUDevice);
35 #endif
36 
37 #define DEFINE(O, I) template struct CastFunctor<GPUDevice, O, I>
38 
39 #define DEFINE_ALL_FROM(in_type)        \
40   DEFINE(in_type, bool);                \
41   DEFINE(in_type, uint8);               \
42   DEFINE(in_type, uint16);              \
43   DEFINE(in_type, uint32);              \
44   DEFINE(in_type, uint64);              \
45   DEFINE(in_type, int8);                \
46   DEFINE(in_type, int16);               \
47   DEFINE(in_type, int32);               \
48   DEFINE(in_type, int64);               \
49   DEFINE(in_type, Eigen::half);         \
50   DEFINE(in_type, float);               \
51   DEFINE(in_type, double);              \
52   DEFINE(in_type, std::complex<float>); \
53   DEFINE(in_type, std::complex<double>)
54 
55 DEFINE(float, bfloat16);
56 
57 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
58 
59 // The subset of types which are currently not supported yet with the MLIR
60 // generated kernels.
61 #define DEFINE_SUBSET_FROM(in_type)     \
62   DEFINE(in_type, uint8);               \
63   DEFINE(in_type, uint16);              \
64   DEFINE(in_type, uint32);              \
65   DEFINE(in_type, uint64);              \
66   DEFINE(in_type, std::complex<float>); \
67   DEFINE(in_type, std::complex<double>)
68 
69 DEFINE_SUBSET_FROM(bool);
70 DEFINE_ALL_FROM(uint8);
71 DEFINE_ALL_FROM(uint16);
72 DEFINE_ALL_FROM(uint32);
73 DEFINE_ALL_FROM(uint64);
74 DEFINE_SUBSET_FROM(int8);
75 DEFINE_SUBSET_FROM(int16);
76 DEFINE_SUBSET_FROM(int32);
77 DEFINE_SUBSET_FROM(int64);
78 DEFINE_SUBSET_FROM(double);
79 // The cast from float to double is still needed for resize_bilinear_op.cc
80 DEFINE(double, float);
81 DEFINE_ALL_FROM(std::complex<double>);
82 
83 #undef DEFINE_SUBSET_FROM
84 
85 #else
86 
87 DEFINE_ALL_FROM(bool);
88 DEFINE_ALL_FROM(uint8);
89 DEFINE_ALL_FROM(uint16);
90 DEFINE_ALL_FROM(uint32);
91 DEFINE_ALL_FROM(uint64);
92 DEFINE_ALL_FROM(int8);
93 DEFINE_ALL_FROM(int16);
94 DEFINE_ALL_FROM(int32);
95 DEFINE_ALL_FROM(int64);
96 DEFINE_ALL_FROM(double);
97 DEFINE_ALL_FROM(std::complex<double>);
98 #endif
99 
100 #define DEFINE_ALL_TO_FLOAT(out_type) \
101   DEFINE(out_type, bool);             \
102   DEFINE(out_type, uint8);            \
103   DEFINE(out_type, uint16);           \
104   DEFINE(out_type, uint32);           \
105   DEFINE(out_type, uint64);           \
106   DEFINE(out_type, int8);             \
107   DEFINE(out_type, int16);            \
108   DEFINE(out_type, int32);            \
109   DEFINE(out_type, int64);            \
110   DEFINE(out_type, Eigen::half);      \
111   DEFINE(out_type, float);            \
112   DEFINE(out_type, std::complex<float>)
113 
114 #define DEFINE_ALL_TO_HALF(out_type) \
115   DEFINE(out_type, bool);            \
116   DEFINE(out_type, uint8);           \
117   DEFINE(out_type, uint16);          \
118   DEFINE(out_type, uint32);          \
119   DEFINE(out_type, uint64);          \
120   DEFINE(out_type, int8);            \
121   DEFINE(out_type, int16);           \
122   DEFINE(out_type, int32);           \
123   DEFINE(out_type, int64);           \
124   DEFINE(out_type, Eigen::half)
125 
126 DEFINE_ALL_TO_HALF(bfloat16);
127 DEFINE_ALL_TO_FLOAT(std::complex<float>);
128 
129 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
130 
131 // The subset of types which are currently not supported yet with the MLIR
132 // generated kernels. The cast from Eigen::half is still needed for
133 // depthwise_conv_grad_op.cc. The cast from float to float is still needed for
134 // resize_bilinear_op.cc.
135 #define DEFINE_SUBSET_TO_FLOAT(out_type) \
136   DEFINE(out_type, uint8);               \
137   DEFINE(out_type, uint16);              \
138   DEFINE(out_type, uint32);              \
139   DEFINE(out_type, uint64);              \
140   DEFINE(out_type, Eigen::half);         \
141   DEFINE(out_type, float);               \
142   DEFINE(out_type, std::complex<float>)
143 
144 // The subset of types which are currently not supported yet with the MLIR
145 // generated kernels.
146 #define DEFINE_SUBSET_TO_HALF(out_type) \
147   DEFINE(out_type, uint8);              \
148   DEFINE(out_type, uint16);             \
149   DEFINE(out_type, uint32);             \
150   DEFINE(out_type, uint64);
151 
152 DEFINE_SUBSET_TO_HALF(Eigen::half);
153 DEFINE_SUBSET_TO_FLOAT(float);
154 
155 #undef DEFINE_SUBSET_TO_FLOAT
156 #undef DEFINE_SUBSET_TO_HALF
157 
158 #else
159 DEFINE_ALL_TO_HALF(Eigen::half);
160 DEFINE_ALL_TO_FLOAT(float);
161 #endif
162 
163 #undef DEFINE_ALL_TO_FLOAT
164 #undef DEFINE_ALL_TO_HALF
165 #undef DEFINE_ALL_FROM
166 #undef DEFINE
167 
168 }  // end namespace functor
169 }  // end namespace tensorflow
170 
171 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
172