1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ 17 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) 18 19 #define EIGEN_USE_GPU 20 21 #include "tensorflow/core/framework/bfloat16.h" 22 #define SPECIALIZE_FOR_GPUS 23 #include "tensorflow/core/kernels/cast_op.h" 24 #undef SPECIALIZE_FOR_GPUS 25 26 namespace tensorflow { 27 namespace functor { 28 29 typedef Eigen::GpuDevice GPUDevice; 30 31 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) 32 CAST_FUNCTORS_SUBSET(GPUDevice); 33 #else 34 CAST_FUNCTORS(GPUDevice); 35 #endif 36 37 #define DEFINE(O, I) template struct CastFunctor<GPUDevice, O, I> 38 39 #define DEFINE_ALL_FROM(in_type) \ 40 DEFINE(in_type, bool); \ 41 DEFINE(in_type, uint8); \ 42 DEFINE(in_type, uint16); \ 43 DEFINE(in_type, uint32); \ 44 DEFINE(in_type, uint64); \ 45 DEFINE(in_type, int8); \ 46 DEFINE(in_type, int16); \ 47 DEFINE(in_type, int32); \ 48 DEFINE(in_type, int64); \ 49 DEFINE(in_type, Eigen::half); \ 50 DEFINE(in_type, float); \ 51 DEFINE(in_type, double); \ 52 DEFINE(in_type, std::complex<float>); \ 53 DEFINE(in_type, std::complex<double>) 54 55 DEFINE(float, bfloat16); 56 57 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) 58 59 // The subset of types which are currently not supported yet with the MLIR 60 // generated kernels. 61 #define DEFINE_SUBSET_FROM(in_type) \ 62 DEFINE(in_type, uint8); \ 63 DEFINE(in_type, uint16); \ 64 DEFINE(in_type, uint32); \ 65 DEFINE(in_type, uint64); \ 66 DEFINE(in_type, std::complex<float>); \ 67 DEFINE(in_type, std::complex<double>) 68 69 DEFINE_SUBSET_FROM(bool); 70 DEFINE_ALL_FROM(uint8); 71 DEFINE_ALL_FROM(uint16); 72 DEFINE_ALL_FROM(uint32); 73 DEFINE_ALL_FROM(uint64); 74 DEFINE_SUBSET_FROM(int8); 75 DEFINE_SUBSET_FROM(int16); 76 DEFINE_SUBSET_FROM(int32); 77 DEFINE_SUBSET_FROM(int64); 78 DEFINE_SUBSET_FROM(double); 79 // The cast from float to double is still needed for resize_bilinear_op.cc 80 DEFINE(double, float); 81 DEFINE_ALL_FROM(std::complex<double>); 82 83 #undef DEFINE_SUBSET_FROM 84 85 #else 86 87 DEFINE_ALL_FROM(bool); 88 DEFINE_ALL_FROM(uint8); 89 DEFINE_ALL_FROM(uint16); 90 DEFINE_ALL_FROM(uint32); 91 DEFINE_ALL_FROM(uint64); 92 DEFINE_ALL_FROM(int8); 93 DEFINE_ALL_FROM(int16); 94 DEFINE_ALL_FROM(int32); 95 DEFINE_ALL_FROM(int64); 96 DEFINE_ALL_FROM(double); 97 DEFINE_ALL_FROM(std::complex<double>); 98 #endif 99 100 #define DEFINE_ALL_TO_FLOAT(out_type) \ 101 DEFINE(out_type, bool); \ 102 DEFINE(out_type, uint8); \ 103 DEFINE(out_type, uint16); \ 104 DEFINE(out_type, uint32); \ 105 DEFINE(out_type, uint64); \ 106 DEFINE(out_type, int8); \ 107 DEFINE(out_type, int16); \ 108 DEFINE(out_type, int32); \ 109 DEFINE(out_type, int64); \ 110 DEFINE(out_type, Eigen::half); \ 111 DEFINE(out_type, float); \ 112 DEFINE(out_type, std::complex<float>) 113 114 #define DEFINE_ALL_TO_HALF(out_type) \ 115 DEFINE(out_type, bool); \ 116 DEFINE(out_type, uint8); \ 117 DEFINE(out_type, uint16); \ 118 DEFINE(out_type, uint32); \ 119 DEFINE(out_type, uint64); \ 120 DEFINE(out_type, int8); \ 121 DEFINE(out_type, int16); \ 122 DEFINE(out_type, int32); \ 123 DEFINE(out_type, int64); \ 124 DEFINE(out_type, Eigen::half) 125 126 DEFINE_ALL_TO_HALF(bfloat16); 127 DEFINE_ALL_TO_FLOAT(std::complex<float>); 128 129 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) 130 131 // The subset of types which are currently not supported yet with the MLIR 132 // generated kernels. The cast from Eigen::half is still needed for 133 // depthwise_conv_grad_op.cc. The cast from float to float is still needed for 134 // resize_bilinear_op.cc. 135 #define DEFINE_SUBSET_TO_FLOAT(out_type) \ 136 DEFINE(out_type, uint8); \ 137 DEFINE(out_type, uint16); \ 138 DEFINE(out_type, uint32); \ 139 DEFINE(out_type, uint64); \ 140 DEFINE(out_type, Eigen::half); \ 141 DEFINE(out_type, float); \ 142 DEFINE(out_type, std::complex<float>) 143 144 // The subset of types which are currently not supported yet with the MLIR 145 // generated kernels. 146 #define DEFINE_SUBSET_TO_HALF(out_type) \ 147 DEFINE(out_type, uint8); \ 148 DEFINE(out_type, uint16); \ 149 DEFINE(out_type, uint32); \ 150 DEFINE(out_type, uint64); 151 152 DEFINE_SUBSET_TO_HALF(Eigen::half); 153 DEFINE_SUBSET_TO_FLOAT(float); 154 155 #undef DEFINE_SUBSET_TO_FLOAT 156 #undef DEFINE_SUBSET_TO_HALF 157 158 #else 159 DEFINE_ALL_TO_HALF(Eigen::half); 160 DEFINE_ALL_TO_FLOAT(float); 161 #endif 162 163 #undef DEFINE_ALL_TO_FLOAT 164 #undef DEFINE_ALL_TO_HALF 165 #undef DEFINE_ALL_FROM 166 #undef DEFINE 167 168 } // end namespace functor 169 } // end namespace tensorflow 170 171 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM 172