1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/relu_op.h" 21 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 22 #include "tensorflow/core/framework/numeric_op.h" 23 #include "tensorflow/core/framework/op_kernel.h" 24 #include "tensorflow/core/framework/register_types.h" 25 #include "tensorflow/core/framework/tensor.h" 26 #include "tensorflow/core/lib/core/errors.h" 27 28 namespace tensorflow { 29 30 typedef Eigen::ThreadPoolDevice CPUDevice; 31 typedef Eigen::GpuDevice GPUDevice; 32 #ifdef TENSORFLOW_USE_SYCL 33 typedef Eigen::SyclDevice SYCLDevice; 34 #endif // TENSORFLOW_USE_SYCL 35 36 #define REGISTER_RELU_KERNELS(type) \ 37 REGISTER_KERNEL_BUILDER( \ 38 Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 39 ReluOp<CPUDevice, type>); \ 40 REGISTER_KERNEL_BUILDER( \ 41 Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 42 ReluGradOp<CPUDevice, type>); \ 43 REGISTER_KERNEL_BUILDER( \ 44 Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 45 Relu6Op<CPUDevice, type>); \ 46 REGISTER_KERNEL_BUILDER( \ 47 Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 48 Relu6GradOp<CPUDevice, type>) \ 49 REGISTER_KERNEL_BUILDER( \ 50 Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 51 LeakyReluOp<CPUDevice, type>); \ 52 REGISTER_KERNEL_BUILDER( \ 53 Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 54 LeakyReluGradOp<CPUDevice, type>); 55 56 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); 57 #undef REGISTER_RELU_KERNELS 58 59 #define REGISTER_ELU_KERNELS(type) \ 60 REGISTER_KERNEL_BUILDER( \ 61 Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 62 EluOp<CPUDevice, type>); \ 63 REGISTER_KERNEL_BUILDER( \ 64 Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 65 EluGradOp<CPUDevice, type>); \ 66 REGISTER_KERNEL_BUILDER( \ 67 Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 68 SeluOp<CPUDevice, type>); \ 69 REGISTER_KERNEL_BUILDER( \ 70 Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 71 SeluGradOp<CPUDevice, type>) 72 73 // Elu and Selu only make sense with float or double. 74 TF_CALL_GPU_NUMBER_TYPES(REGISTER_ELU_KERNELS); 75 #undef REGISTER_ELU_KERNELS 76 77 #if GOOGLE_CUDA 78 // Forward declarations of the functor specializations for GPU. 79 namespace functor { 80 #define DECLARE_GPU_SPEC(T) \ 81 template <> \ 82 void Relu<GPUDevice, T>::operator()( \ 83 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 84 typename TTypes<T>::Tensor activations); \ 85 extern template struct Relu<GPUDevice, T>; \ 86 \ 87 template <> \ 88 void ReluGrad<GPUDevice, T>::operator()( \ 89 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 90 typename TTypes<T>::ConstTensor features, \ 91 typename TTypes<T>::Tensor backprops); \ 92 extern template struct ReluGrad<GPUDevice, T>; \ 93 \ 94 template <> \ 95 void Relu6<GPUDevice, T>::operator()( \ 96 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 97 typename TTypes<T>::Tensor activations); \ 98 extern template struct Relu6<GPUDevice, T>; \ 99 \ 100 template <> \ 101 void Relu6Grad<GPUDevice, T>::operator()( \ 102 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 103 typename TTypes<T>::ConstTensor features, \ 104 typename TTypes<T>::Tensor backprops); \ 105 extern template struct Relu6Grad<GPUDevice, T>; \ 106 \ 107 template <> \ 108 void LeakyRelu<GPUDevice, T>::operator()( \ 109 const GPUDevice& d, typename TTypes<T>::ConstTensor features, T alpha, \ 110 typename TTypes<T>::Tensor activations); \ 111 extern template struct LeakyRelu<GPUDevice, T>; \ 112 \ 113 template <> \ 114 void LeakyReluGrad<GPUDevice, T>::operator()( \ 115 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 116 typename TTypes<T>::ConstTensor features, T alpha, \ 117 typename TTypes<T>::Tensor backprops); \ 118 extern template struct LeakyReluGrad<GPUDevice, T>; \ 119 \ 120 template <> \ 121 void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \ 122 typename TTypes<T>::ConstTensor features, \ 123 typename TTypes<T>::Tensor activations); \ 124 extern template struct Elu<GPUDevice, T>; \ 125 \ 126 template <> \ 127 void EluGrad<GPUDevice, T>::operator()( \ 128 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 129 typename TTypes<T>::ConstTensor activations, \ 130 typename TTypes<T>::Tensor backprops); \ 131 extern template struct EluGrad<GPUDevice, T>; \ 132 \ 133 template <> \ 134 void Selu<GPUDevice, T>::operator()( \ 135 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 136 typename TTypes<T>::Tensor activations); \ 137 extern template struct Selu<GPUDevice, T>; \ 138 \ 139 template <> \ 140 void SeluGrad<GPUDevice, T>::operator()( \ 141 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 142 typename TTypes<T>::ConstTensor activations, \ 143 typename TTypes<T>::Tensor backprops); \ 144 extern template struct SeluGrad<GPUDevice, T>; 145 146 template <> 147 void Relu<GPUDevice, qint8>::operator()( 148 const GPUDevice& d, typename TTypes<qint8>::ConstTensor features, 149 typename TTypes<qint8>::Tensor activations); 150 extern template struct Relu<GPUDevice, qint8>; 151 152 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); 153 } // namespace functor 154 155 // Registration of the GPU implementations. 156 #define REGISTER_GPU_KERNELS(type) \ 157 REGISTER_KERNEL_BUILDER( \ 158 Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 159 ReluOp<GPUDevice, type>); \ 160 REGISTER_KERNEL_BUILDER( \ 161 Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 162 ReluGradOp<GPUDevice, type>); \ 163 REGISTER_KERNEL_BUILDER( \ 164 Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 165 Relu6Op<GPUDevice, type>); \ 166 REGISTER_KERNEL_BUILDER( \ 167 Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 168 Relu6GradOp<GPUDevice, type>); \ 169 REGISTER_KERNEL_BUILDER( \ 170 Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 171 LeakyReluOp<GPUDevice, type>); \ 172 REGISTER_KERNEL_BUILDER( \ 173 Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 174 LeakyReluGradOp<GPUDevice, type>); \ 175 REGISTER_KERNEL_BUILDER( \ 176 Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 177 EluOp<GPUDevice, type>); \ 178 REGISTER_KERNEL_BUILDER( \ 179 Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 180 EluGradOp<GPUDevice, type>); \ 181 REGISTER_KERNEL_BUILDER( \ 182 Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 183 SeluOp<GPUDevice, type>); \ 184 REGISTER_KERNEL_BUILDER( \ 185 Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 186 SeluGradOp<GPUDevice, type>) 187 188 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); 189 #undef REGISTER_GPU_KERNELS 190 191 template <typename Device> 192 class ReluOp<Device, qint8> 193 : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> { 194 public: 195 using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp; 196 Operate(OpKernelContext * context,const Tensor & input,Tensor * output)197 void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { 198 auto flat_input = input.flat<qint8>(); 199 OP_REQUIRES(context, (flat_input.size() % 4) == 0, 200 errors::InvalidArgument( 201 "Tensor size must be a multiple of 4 for Relu<qint8>. Got ", 202 flat_input.size())); 203 functor::Relu<Device, qint8> func; 204 func(context->eigen_device<Device>(), flat_input, output->flat<qint8>()); 205 } 206 }; 207 208 REGISTER_KERNEL_BUILDER( 209 Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"), 210 ReluOp<GPUDevice, qint8>); 211 212 #endif // GOOGLE_CUDA 213 214 #ifdef TENSORFLOW_USE_SYCL 215 // Registration of the GPU implementations. 216 #define REGISTER_SYCL_KERNELS(type) \ 217 REGISTER_KERNEL_BUILDER( \ 218 Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 219 ReluOp<SYCLDevice, type>); \ 220 REGISTER_KERNEL_BUILDER( \ 221 Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 222 ReluGradOp<SYCLDevice, type>); \ 223 REGISTER_KERNEL_BUILDER( \ 224 Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 225 Relu6Op<SYCLDevice, type>); \ 226 REGISTER_KERNEL_BUILDER( \ 227 Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 228 Relu6GradOp<SYCLDevice, type>); \ 229 REGISTER_KERNEL_BUILDER( \ 230 Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 231 LeakyReluOp<SYCLDevice, type>); \ 232 REGISTER_KERNEL_BUILDER( \ 233 Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 234 LeakyReluGradOp<SYCLDevice, type>); \ 235 REGISTER_KERNEL_BUILDER( \ 236 Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 237 EluOp<SYCLDevice, type>); \ 238 REGISTER_KERNEL_BUILDER( \ 239 Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 240 EluGradOp<SYCLDevice, type>); \ 241 REGISTER_KERNEL_BUILDER( \ 242 Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 243 SeluOp<SYCLDevice, type>); \ 244 REGISTER_KERNEL_BUILDER( \ 245 Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \ 246 SeluGradOp<SYCLDevice, type>) 247 248 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); 249 #undef REGISTER_SYCL_KERNELS 250 #endif // TENSORFLOW_USE_SYCL 251 252 } // namespace tensorflow 253