1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/relu_op.h" 21 22 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 23 #include "tensorflow/core/framework/numeric_op.h" 24 #include "tensorflow/core/framework/op_kernel.h" 25 #include "tensorflow/core/framework/register_types.h" 26 #include "tensorflow/core/framework/tensor.h" 27 #include "tensorflow/core/lib/core/errors.h" 28 29 namespace tensorflow { 30 31 typedef Eigen::ThreadPoolDevice CPUDevice; 32 typedef Eigen::GpuDevice GPUDevice; 33 34 #define REGISTER_RELU_KERNELS(type) \ 35 REGISTER_KERNEL_BUILDER( \ 36 Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 37 ReluOp<CPUDevice, type>); \ 38 REGISTER_KERNEL_BUILDER( \ 39 Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 40 ReluGradOp<CPUDevice, type>); \ 41 REGISTER_KERNEL_BUILDER( \ 42 Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 43 Relu6Op<CPUDevice, type>); \ 44 REGISTER_KERNEL_BUILDER( \ 45 Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 46 Relu6GradOp<CPUDevice, type>) \ 47 REGISTER_KERNEL_BUILDER( \ 48 Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 49 LeakyReluOp<CPUDevice, type>); \ 50 REGISTER_KERNEL_BUILDER( \ 51 Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 52 LeakyReluGradOp<CPUDevice, type>); 53 54 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); 55 #undef REGISTER_RELU_KERNELS 56 57 #define REGISTER_ELU_KERNELS(type) \ 58 REGISTER_KERNEL_BUILDER( \ 59 Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 60 EluOp<CPUDevice, type>); \ 61 REGISTER_KERNEL_BUILDER( \ 62 Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 63 EluGradOp<CPUDevice, type>); \ 64 REGISTER_KERNEL_BUILDER( \ 65 Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 66 SeluOp<CPUDevice, type>); \ 67 REGISTER_KERNEL_BUILDER( \ 68 Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 69 SeluGradOp<CPUDevice, type>) 70 71 // Elu and Selu only make sense with float or double. 72 TF_CALL_FLOAT_TYPES(REGISTER_ELU_KERNELS); 73 #undef REGISTER_ELU_KERNELS 74 75 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM 76 77 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) 78 79 namespace functor { 80 #define DECLARE_GPU_NO_MLIR_SPEC(T) \ 81 template <> \ 82 void Relu<GPUDevice, T>::operator()( \ 83 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 84 typename TTypes<T>::Tensor activations); \ 85 extern template struct Relu<GPUDevice, T>; \ 86 \ 87 template <> \ 88 void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \ 89 typename TTypes<T>::ConstTensor features, \ 90 typename TTypes<T>::Tensor activations); \ 91 extern template struct Elu<GPUDevice, T>; \ 92 \ 93 template <> \ 94 void Selu<GPUDevice, T>::operator()( \ 95 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 96 typename TTypes<T>::Tensor activations); \ 97 extern template struct Selu<GPUDevice, T>; 98 99 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_NO_MLIR_SPEC); 100 } // namespace functor 101 102 #define REGISTER_GPU_NO_MLIR_KERNELS(type) \ 103 REGISTER_KERNEL_BUILDER( \ 104 Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 105 ReluOp<GPUDevice, type>); \ 106 REGISTER_KERNEL_BUILDER( \ 107 Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 108 EluOp<GPUDevice, type>); \ 109 REGISTER_KERNEL_BUILDER( \ 110 Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 111 SeluOp<GPUDevice, type>); 112 113 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_NO_MLIR_KERNELS); 114 #undef REGISTER_RELU_KERNEL 115 #endif 116 117 // Forward declarations of the functor specializations for GPU. 118 namespace functor { 119 #define DECLARE_GPU_SPEC(T) \ 120 template <> \ 121 void ReluGrad<GPUDevice, T>::operator()( \ 122 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 123 typename TTypes<T>::ConstTensor features, \ 124 typename TTypes<T>::Tensor backprops); \ 125 extern template struct ReluGrad<GPUDevice, T>; \ 126 \ 127 template <> \ 128 void Relu6<GPUDevice, T>::operator()( \ 129 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 130 typename TTypes<T>::Tensor activations); \ 131 extern template struct Relu6<GPUDevice, T>; \ 132 \ 133 template <> \ 134 void Relu6Grad<GPUDevice, T>::operator()( \ 135 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 136 typename TTypes<T>::ConstTensor features, \ 137 typename TTypes<T>::Tensor backprops); \ 138 extern template struct Relu6Grad<GPUDevice, T>; \ 139 \ 140 template <> \ 141 void LeakyRelu<GPUDevice, T>::operator()(LeakyReluArgs args); \ 142 extern template struct LeakyRelu<GPUDevice, T>; \ 143 \ 144 template <> \ 145 void LeakyReluGrad<GPUDevice, T>::operator()( \ 146 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 147 typename TTypes<T>::ConstTensor features, T alpha, \ 148 typename TTypes<T>::Tensor backprops); \ 149 extern template struct LeakyReluGrad<GPUDevice, T>; \ 150 \ 151 template <> \ 152 void EluGrad<GPUDevice, T>::operator()( \ 153 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 154 typename TTypes<T>::ConstTensor activations, \ 155 typename TTypes<T>::Tensor backprops); \ 156 extern template struct EluGrad<GPUDevice, T>; \ 157 \ 158 template <> \ 159 void SeluGrad<GPUDevice, T>::operator()( \ 160 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 161 typename TTypes<T>::ConstTensor activations, \ 162 typename TTypes<T>::Tensor backprops); \ 163 extern template struct SeluGrad<GPUDevice, T>; 164 165 template <> 166 void Relu<GPUDevice, qint8>::operator()( 167 const GPUDevice& d, typename TTypes<qint8>::ConstTensor features, 168 typename TTypes<qint8>::Tensor activations); 169 extern template struct Relu<GPUDevice, qint8>; 170 171 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); 172 } // namespace functor 173 174 // Registration of the GPU implementations. 175 #define REGISTER_GPU_KERNELS(type) \ 176 REGISTER_KERNEL_BUILDER( \ 177 Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 178 ReluGradOp<GPUDevice, type>); \ 179 REGISTER_KERNEL_BUILDER( \ 180 Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 181 Relu6Op<GPUDevice, type>); \ 182 REGISTER_KERNEL_BUILDER( \ 183 Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 184 Relu6GradOp<GPUDevice, type>); \ 185 REGISTER_KERNEL_BUILDER( \ 186 Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 187 LeakyReluOp<GPUDevice, type>); \ 188 REGISTER_KERNEL_BUILDER( \ 189 Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 190 LeakyReluGradOp<GPUDevice, type>); \ 191 REGISTER_KERNEL_BUILDER( \ 192 Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 193 EluGradOp<GPUDevice, type>); \ 194 REGISTER_KERNEL_BUILDER( \ 195 Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 196 SeluGradOp<GPUDevice, type>) 197 198 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); 199 #undef REGISTER_GPU_KERNELS 200 201 template <typename Device> 202 class ReluOp<Device, qint8> 203 : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> { 204 public: 205 using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp; 206 Operate(OpKernelContext * context,const Tensor & input,Tensor * output)207 void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { 208 auto flat_input = input.flat<qint8>(); 209 OP_REQUIRES(context, (flat_input.size() % 4) == 0, 210 errors::InvalidArgument( 211 "Tensor size must be a multiple of 4 for Relu<qint8>. Got ", 212 flat_input.size())); 213 functor::Relu<Device, qint8> func; 214 func(context->eigen_device<Device>(), flat_input, output->flat<qint8>()); 215 } 216 }; 217 218 REGISTER_KERNEL_BUILDER( 219 Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"), 220 ReluOp<GPUDevice, qint8>); 221 222 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM 223 224 } // namespace tensorflow 225