• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/nn_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #include "tensorflow/core/kernels/relu_op.h"
21 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
22 #include "tensorflow/core/framework/numeric_op.h"
23 #include "tensorflow/core/framework/op_kernel.h"
24 #include "tensorflow/core/framework/register_types.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/lib/core/errors.h"
27 
28 namespace tensorflow {
29 
30 typedef Eigen::ThreadPoolDevice CPUDevice;
31 typedef Eigen::GpuDevice GPUDevice;
32 #ifdef TENSORFLOW_USE_SYCL
33 typedef Eigen::SyclDevice SYCLDevice;
34 #endif  // TENSORFLOW_USE_SYCL
35 
36 #define REGISTER_RELU_KERNELS(type)                                       \
37   REGISTER_KERNEL_BUILDER(                                                \
38       Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"),          \
39       ReluOp<CPUDevice, type>);                                           \
40   REGISTER_KERNEL_BUILDER(                                                \
41       Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
42       ReluGradOp<CPUDevice, type>);                                       \
43   REGISTER_KERNEL_BUILDER(                                                \
44       Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"),         \
45       Relu6Op<CPUDevice, type>);                                          \
46   REGISTER_KERNEL_BUILDER(                                                \
47       Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
48       Relu6GradOp<CPUDevice, type>)                                       \
49   REGISTER_KERNEL_BUILDER(                                                \
50       Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
51       LeakyReluOp<CPUDevice, type>);                                      \
52   REGISTER_KERNEL_BUILDER(                                                \
53       Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
54       LeakyReluGradOp<CPUDevice, type>);
55 
56 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
57 #undef REGISTER_RELU_KERNELS
58 
59 #define REGISTER_ELU_KERNELS(type)                                   \
60   REGISTER_KERNEL_BUILDER(                                           \
61       Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"),      \
62       EluOp<CPUDevice, type>);                                       \
63   REGISTER_KERNEL_BUILDER(                                           \
64       Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),  \
65       EluGradOp<CPUDevice, type>);                                   \
66   REGISTER_KERNEL_BUILDER(                                           \
67       Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
68       SeluOp<CPUDevice, type>);                                      \
69   REGISTER_KERNEL_BUILDER(                                           \
70       Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
71       SeluGradOp<CPUDevice, type>)
72 
73 // Elu and Selu only make sense with float or double.
74 TF_CALL_GPU_NUMBER_TYPES(REGISTER_ELU_KERNELS);
75 #undef REGISTER_ELU_KERNELS
76 
77 #if GOOGLE_CUDA
78 // Forward declarations of the functor specializations for GPU.
79 namespace functor {
80 #define DECLARE_GPU_SPEC(T)                                                    \
81   template <>                                                                  \
82   void Relu<GPUDevice, T>::operator()(                                         \
83       const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
84       typename TTypes<T>::Tensor activations);                                 \
85   extern template struct Relu<GPUDevice, T>;                                   \
86                                                                                \
87   template <>                                                                  \
88   void ReluGrad<GPUDevice, T>::operator()(                                     \
89       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
90       typename TTypes<T>::ConstTensor features,                                \
91       typename TTypes<T>::Tensor backprops);                                   \
92   extern template struct ReluGrad<GPUDevice, T>;                               \
93                                                                                \
94   template <>                                                                  \
95   void Relu6<GPUDevice, T>::operator()(                                        \
96       const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
97       typename TTypes<T>::Tensor activations);                                 \
98   extern template struct Relu6<GPUDevice, T>;                                  \
99                                                                                \
100   template <>                                                                  \
101   void Relu6Grad<GPUDevice, T>::operator()(                                    \
102       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
103       typename TTypes<T>::ConstTensor features,                                \
104       typename TTypes<T>::Tensor backprops);                                   \
105   extern template struct Relu6Grad<GPUDevice, T>;                              \
106                                                                                \
107   template <>                                                                  \
108   void LeakyRelu<GPUDevice, T>::operator()(                                    \
109       const GPUDevice& d, typename TTypes<T>::ConstTensor features, T alpha,   \
110       typename TTypes<T>::Tensor activations);                                 \
111   extern template struct LeakyRelu<GPUDevice, T>;                              \
112                                                                                \
113   template <>                                                                  \
114   void LeakyReluGrad<GPUDevice, T>::operator()(                                \
115       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
116       typename TTypes<T>::ConstTensor features, T alpha,                       \
117       typename TTypes<T>::Tensor backprops);                                   \
118   extern template struct LeakyReluGrad<GPUDevice, T>;                          \
119                                                                                \
120   template <>                                                                  \
121   void Elu<GPUDevice, T>::operator()(const GPUDevice& d,                       \
122                                      typename TTypes<T>::ConstTensor features, \
123                                      typename TTypes<T>::Tensor activations);  \
124   extern template struct Elu<GPUDevice, T>;                                    \
125                                                                                \
126   template <>                                                                  \
127   void EluGrad<GPUDevice, T>::operator()(                                      \
128       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
129       typename TTypes<T>::ConstTensor activations,                             \
130       typename TTypes<T>::Tensor backprops);                                   \
131   extern template struct EluGrad<GPUDevice, T>;                                \
132                                                                                \
133   template <>                                                                  \
134   void Selu<GPUDevice, T>::operator()(                                         \
135       const GPUDevice& d, typename TTypes<T>::ConstTensor features,            \
136       typename TTypes<T>::Tensor activations);                                 \
137   extern template struct Selu<GPUDevice, T>;                                   \
138                                                                                \
139   template <>                                                                  \
140   void SeluGrad<GPUDevice, T>::operator()(                                     \
141       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients,           \
142       typename TTypes<T>::ConstTensor activations,                             \
143       typename TTypes<T>::Tensor backprops);                                   \
144   extern template struct SeluGrad<GPUDevice, T>;
145 
146 template <>
147 void Relu<GPUDevice, qint8>::operator()(
148     const GPUDevice& d, typename TTypes<qint8>::ConstTensor features,
149     typename TTypes<qint8>::Tensor activations);
150 extern template struct Relu<GPUDevice, qint8>;
151 
152 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
153 }  // namespace functor
154 
155 // Registration of the GPU implementations.
156 #define REGISTER_GPU_KERNELS(type)                                        \
157   REGISTER_KERNEL_BUILDER(                                                \
158       Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
159       ReluOp<GPUDevice, type>);                                           \
160   REGISTER_KERNEL_BUILDER(                                                \
161       Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
162       ReluGradOp<GPUDevice, type>);                                       \
163   REGISTER_KERNEL_BUILDER(                                                \
164       Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"),         \
165       Relu6Op<GPUDevice, type>);                                          \
166   REGISTER_KERNEL_BUILDER(                                                \
167       Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
168       Relu6GradOp<GPUDevice, type>);                                      \
169   REGISTER_KERNEL_BUILDER(                                                \
170       Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
171       LeakyReluOp<GPUDevice, type>);                                      \
172   REGISTER_KERNEL_BUILDER(                                                \
173       Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
174       LeakyReluGradOp<GPUDevice, type>);                                  \
175   REGISTER_KERNEL_BUILDER(                                                \
176       Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"),           \
177       EluOp<GPUDevice, type>);                                            \
178   REGISTER_KERNEL_BUILDER(                                                \
179       Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),       \
180       EluGradOp<GPUDevice, type>);                                        \
181   REGISTER_KERNEL_BUILDER(                                                \
182       Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
183       SeluOp<GPUDevice, type>);                                           \
184   REGISTER_KERNEL_BUILDER(                                                \
185       Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),      \
186       SeluGradOp<GPUDevice, type>)
187 
188 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
189 #undef REGISTER_GPU_KERNELS
190 
191 template <typename Device>
192 class ReluOp<Device, qint8>
193     : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> {
194  public:
195   using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp;
196 
Operate(OpKernelContext * context,const Tensor & input,Tensor * output)197   void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
198     auto flat_input = input.flat<qint8>();
199     OP_REQUIRES(context, (flat_input.size() % 4) == 0,
200                 errors::InvalidArgument(
201                     "Tensor size must be a multiple of 4 for Relu<qint8>. Got ",
202                     flat_input.size()));
203     functor::Relu<Device, qint8> func;
204     func(context->eigen_device<Device>(), flat_input, output->flat<qint8>());
205   }
206 };
207 
208 REGISTER_KERNEL_BUILDER(
209     Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"),
210     ReluOp<GPUDevice, qint8>);
211 
212 #endif  // GOOGLE_CUDA
213 
214 #ifdef TENSORFLOW_USE_SYCL
215 // Registration of the GPU implementations.
216 #define REGISTER_SYCL_KERNELS(type)                                        \
217   REGISTER_KERNEL_BUILDER(                                                 \
218       Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
219       ReluOp<SYCLDevice, type>);                                           \
220   REGISTER_KERNEL_BUILDER(                                                 \
221       Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
222       ReluGradOp<SYCLDevice, type>);                                       \
223   REGISTER_KERNEL_BUILDER(                                                 \
224       Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),         \
225       Relu6Op<SYCLDevice, type>);                                          \
226   REGISTER_KERNEL_BUILDER(                                                 \
227       Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
228       Relu6GradOp<SYCLDevice, type>);                                      \
229   REGISTER_KERNEL_BUILDER(                                                 \
230       Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
231       LeakyReluOp<SYCLDevice, type>);                                      \
232   REGISTER_KERNEL_BUILDER(                                                 \
233       Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
234       LeakyReluGradOp<SYCLDevice, type>);                                  \
235   REGISTER_KERNEL_BUILDER(                                                 \
236       Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),           \
237       EluOp<SYCLDevice, type>);                                            \
238   REGISTER_KERNEL_BUILDER(                                                 \
239       Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
240       EluGradOp<SYCLDevice, type>);                                        \
241   REGISTER_KERNEL_BUILDER(                                                 \
242       Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
243       SeluOp<SYCLDevice, type>);                                           \
244   REGISTER_KERNEL_BUILDER(                                                 \
245       Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
246       SeluGradOp<SYCLDevice, type>)
247 
248 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
249 #undef REGISTER_SYCL_KERNELS
250 #endif  // TENSORFLOW_USE_SYCL
251 
252 }  // namespace tensorflow
253