• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
21     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22 #define EIGEN_USE_GPU
23 #endif
24 
25 #include "tensorflow/core/kernels/constant_op.h"
26 
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/allocator.h"
29 #include "tensorflow/core/framework/bounds_check.h"
30 #include "tensorflow/core/framework/node_def.pb.h"
31 #include "tensorflow/core/framework/register_types.h"
32 #include "tensorflow/core/framework/tensor.h"
33 #include "tensorflow/core/framework/tensor.pb.h"
34 #include "tensorflow/core/framework/tensor_shape.h"
35 #include "tensorflow/core/framework/tensor_types.h"
36 #include "tensorflow/core/framework/types.h"
37 #include "tensorflow/core/framework/variant_op_registry.h"
38 #include "tensorflow/core/graph/graph_node_util.h"
39 #include "tensorflow/core/kernels/fill_functor.h"
40 #include "tensorflow/core/platform/macros.h"
41 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
42 
43 namespace tensorflow {
44 
45 namespace {
46 
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)47 NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
48   const NodeDef& original = ctx->def();
49   if (std::is_base_of<protobuf::Message, NodeDef>()) {
50     DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
51                   ->GetDescriptor()
52                   ->field_count(),
53               7)
54         << "The NodeDef format has changed, and the attr-stripping code may "
55            "need to be updated.";
56   }
57   NodeDef ret;
58   ret.set_name(original.name());
59   ret.set_op(original.op());
60   ret.set_device(original.device());
61   // Strip the "value" attr from the returned NodeDef.
62   // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
63   // attrs that affect the cardinality of list-typed inputs and outputs, so it
64   // is safe to drop other attrs from the NodeDef.
65   AddNodeAttr("dtype", ctx->output_type(0), &ret);
66   MergeDebugInfo(original, &ret);
67   if (original.has_experimental_type()) {
68     *ret.mutable_experimental_type() = original.experimental_type();
69   }
70   return ret;
71 }
72 
73 }  // namespace
74 
ConstantOp(OpKernelConstruction * ctx)75 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
76     : OpKernel(ctx, StripTensorDataFromNodeDef(ctx), false),
77       tensor_(ctx->output_type(0)) {
78   const TensorProto* proto = nullptr;
79   profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
80   OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
81   OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
82                           *proto, AllocatorAttributes(), &tensor_));
83   OP_REQUIRES(
84       ctx, ctx->output_type(0) == tensor_.dtype(),
85       errors::InvalidArgument("Type mismatch between value (",
86                               DataTypeString(tensor_.dtype()), ") and dtype (",
87                               DataTypeString(ctx->output_type(0)), ")"));
88 }
89 
Compute(OpKernelContext * ctx)90 void ConstantOp::Compute(OpKernelContext* ctx) {
91   ctx->set_output(0, tensor_);
92   if (TF_PREDICT_FALSE(ctx->track_allocations())) {
93     ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
94   }
95 }
96 
~ConstantOp()97 ConstantOp::~ConstantOp() {}
98 
99 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
100 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
101 
102 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
103     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
104 #define REGISTER_KERNEL(D, TYPE)                                      \
105   REGISTER_KERNEL_BUILDER(                                            \
106       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
107       ConstantOp);
108 REGISTER_KERNEL(GPU, Eigen::half);
109 REGISTER_KERNEL(GPU, bfloat16);
110 REGISTER_KERNEL(GPU, float);
111 REGISTER_KERNEL(GPU, double);
112 REGISTER_KERNEL(GPU, uint8);
113 REGISTER_KERNEL(GPU, int8);
114 REGISTER_KERNEL(GPU, qint8);
115 REGISTER_KERNEL(GPU, uint16);
116 REGISTER_KERNEL(GPU, int16);
117 REGISTER_KERNEL(GPU, qint16);
118 REGISTER_KERNEL(GPU, quint16);
119 REGISTER_KERNEL(GPU, uint32);
120 REGISTER_KERNEL(GPU, qint32);
121 REGISTER_KERNEL(GPU, int64_t);
122 REGISTER_KERNEL(GPU, uint64);
123 REGISTER_KERNEL(GPU, complex64);
124 REGISTER_KERNEL(GPU, complex128);
125 REGISTER_KERNEL(GPU, bool);
126 REGISTER_KERNEL(GPU, Variant);
127 #undef REGISTER_KERNEL
128 #endif
129 
130 #define REGISTER_DEFAULT_KERNEL(TYPE)                                     \
131   REGISTER_KERNEL_BUILDER(                                                \
132       Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
133       ConstantOp);
134 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
135 TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
136 TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
137 TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
138 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
139 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
140 #undef REGISTER_DEFAULT_KERNEL
141 
142 typedef Eigen::ThreadPoolDevice CPUDevice;
143 typedef Eigen::GpuDevice GPUDevice;
144 
145 template <typename Device, typename T, typename Index>
146 class FillOp : public OpKernel {
147  public:
FillOp(OpKernelConstruction * context)148   explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
149 
Compute(OpKernelContext * context)150   void Compute(OpKernelContext* context) override {
151     const Tensor& Tdims = context->input(0);
152     OP_REQUIRES(
153         context,
154         // TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
155         (TensorShapeUtils::IsVector(Tdims.shape()) ||
156          TensorShapeUtils::IsScalar(Tdims.shape())),
157         errors::InvalidArgument("dims must represent a vector, got shape ",
158                                 Tdims.shape().DebugString()));
159     const Tensor& Tvalue = context->input(1);
160     OP_REQUIRES(
161         context,
162         // TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
163         // scalar.
164         TensorShapeUtils::IsScalar(Tvalue.shape()) ||
165             (TensorShapeUtils::IsVector(Tvalue.shape()) &&
166              Tvalue.shape().dim_size(0) == 1),
167         errors::InvalidArgument("value must represent a scalar, got shape ",
168                                 Tvalue.shape().DebugString()));
169     auto dims = Tdims.flat<Index>();
170     TensorShape shape;
171     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
172                                 reinterpret_cast<const Index*>(dims.data()),
173                                 dims.size(), &shape));
174     Tensor* out = nullptr;
175     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
176     functor::FillFunctor<Device, T> functor;
177     functor(context->eigen_device<Device>(), out->flat<T>(),
178             Tvalue.scalar<T>());
179   }
180 };
181 
182 #define REGISTER_KERNEL(D, TYPE)                                     \
183   REGISTER_KERNEL_BUILDER(Name("Fill")                               \
184                               .Device(DEVICE_##D)                    \
185                               .TypeConstraint<TYPE>("T")             \
186                               .TypeConstraint<int32>("index_type")   \
187                               .HostMemory("dims"),                   \
188                           FillOp<D##Device, TYPE, int32>);           \
189   REGISTER_KERNEL_BUILDER(Name("Fill")                               \
190                               .Device(DEVICE_##D)                    \
191                               .TypeConstraint<TYPE>("T")             \
192                               .TypeConstraint<int64_t>("index_type") \
193                               .HostMemory("dims"),                   \
194                           FillOp<D##Device, TYPE, int64>);
195 
196 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
197 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
198 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
199 // the conversion from uint8 to quint8.
200 REGISTER_KERNEL(CPU, quint8);
201 REGISTER_KERNEL(CPU, quint16);
202 REGISTER_KERNEL(CPU, qint8);
203 REGISTER_KERNEL(CPU, qint16);
204 REGISTER_KERNEL(CPU, qint32);
205 #undef REGISTER_CPU_KERNEL
206 
207 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
208     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
209 REGISTER_KERNEL(GPU, Eigen::half);
210 REGISTER_KERNEL(GPU, bfloat16);
211 REGISTER_KERNEL(GPU, float);
212 REGISTER_KERNEL(GPU, double);
213 REGISTER_KERNEL(GPU, complex64);
214 REGISTER_KERNEL(GPU, complex128);
215 REGISTER_KERNEL(GPU, uint8);
216 REGISTER_KERNEL(GPU, int8);
217 REGISTER_KERNEL(GPU, uint16);
218 REGISTER_KERNEL(GPU, int16);
219 REGISTER_KERNEL(GPU, int64_t);
220 REGISTER_KERNEL(GPU, bool);
221 // Currently we do not support filling strings on GPU
222 
223 // A special DEVICE_DEFAULT kernel for int32.
224 // TODO(b/25387198): Also enable int32 in device memory. This kernel
225 // registration requires all int32 inputs and outputs to be in host memory.
226 REGISTER_KERNEL_BUILDER(Name("Fill")
227                             .Device(DEVICE_DEFAULT)
228                             .TypeConstraint<int32>("T")
229                             .TypeConstraint<int32>("index_type")
230                             .HostMemory("dims")
231                             .HostMemory("value")
232                             .HostMemory("output"),
233                         FillOp<CPUDevice, int32, int32>);
234 #endif
235 
236 #undef REGISTER_KERNEL
237 
238 template <typename Device, typename T>
239 class ZerosLikeOp : public OpKernel {
240  public:
ZerosLikeOp(OpKernelConstruction * ctx)241   explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
242 
Compute(OpKernelContext * ctx)243   void Compute(OpKernelContext* ctx) override {
244     const Tensor& input = ctx->input(0);
245     const Device& d = ctx->eigen_device<Device>();
246     if (std::is_same<T, Variant>::value) {
247       OP_REQUIRES(
248           ctx, input.dims() == 0,
249           errors::InvalidArgument("ZerosLike non-scalar Tensor with "
250                                   "dtype=DT_VARIANT is not supported."));
251       const Variant& v = input.scalar<Variant>()();
252       // DT_VARIANT tensors must be allocated on CPU since they wrap C++
253       // objects which can not be efficiently represented in GPU memory.
254       int numa_node = ctx->device()->NumaNode();
255       Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
256       Variant* out_v = &(out.scalar<Variant>()());
257       OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
258                               ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
259       ctx->set_output(0, out);
260     } else {
261       Tensor* out = nullptr;
262       OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
263                               {0}, 0, input.shape(), &out));
264       functor::SetZeroFunctor<Device, T> f;
265       f(d, out->flat<T>());
266     }
267   }
268 };
269 
270 #define REGISTER_KERNEL(type, dev)                                      \
271   REGISTER_KERNEL_BUILDER(                                              \
272       Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
273       ZerosLikeOp<dev##Device, type>)
274 
275 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
276 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
277 REGISTER_CPU(Variant);
278 #undef REGISTER_CPU
279 
280 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
281     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
282 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
283 REGISTER_KERNEL(bool, GPU);
284 REGISTER_KERNEL(Eigen::half, GPU);
285 REGISTER_KERNEL(float, GPU);
286 REGISTER_KERNEL(double, GPU);
287 REGISTER_KERNEL(int64_t, GPU);
288 REGISTER_KERNEL(complex64, GPU);
289 REGISTER_KERNEL(complex128, GPU);
290 #endif
291 
292 REGISTER_KERNEL(bfloat16, GPU);
293 REGISTER_KERNEL(Variant, GPU);
294 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
295 #undef REGISTER_KERNEL
296 
297 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
298                             .Device(DEVICE_DEFAULT)
299                             .TypeConstraint<int32>("T")
300                             .HostMemory("y"),
301                         ZerosLikeOp<CPUDevice, int32>);
302 
303 template <typename Device, typename T>
304 class OnesLikeOp : public OpKernel {
305  public:
OnesLikeOp(OpKernelConstruction * ctx)306   explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
307 
Compute(OpKernelContext * ctx)308   void Compute(OpKernelContext* ctx) override {
309     const Tensor& input = ctx->input(0);
310     Tensor* out = nullptr;
311     OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
312                             {0}, 0, input.shape(), &out));
313     functor::SetOneFunctor<Device, T> f;
314     f(ctx->eigen_device<Device>(), out->flat<T>());
315   }
316 };
317 
318 #define REGISTER_KERNEL(type, dev)                                     \
319   REGISTER_KERNEL_BUILDER(                                             \
320       Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
321       OnesLikeOp<dev##Device, type>)
322 
323 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
324 TF_CALL_POD_TYPES(REGISTER_CPU);
325 #undef REGISTER_CPU
326 
327 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
328     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
329 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
330 REGISTER_KERNEL(bool, GPU);
331 REGISTER_KERNEL(Eigen::half, GPU);
332 REGISTER_KERNEL(float, GPU);
333 REGISTER_KERNEL(double, GPU);
334 REGISTER_KERNEL(int64_t, GPU);
335 REGISTER_KERNEL(complex64, GPU);
336 REGISTER_KERNEL(complex128, GPU);
337 #endif
338 REGISTER_KERNEL(bfloat16, GPU);
339 REGISTER_KERNEL_BUILDER(Name("OnesLike")
340                             .Device(DEVICE_DEFAULT)
341                             .TypeConstraint<int32>("T")
342                             .HostMemory("y"),
343                         OnesLikeOp<CPUDevice, int32>);
344 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
345 
346 #undef REGISTER_KERNEL
347 
PlaceholderOp(OpKernelConstruction * ctx)348 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
349   OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
350 }
351 
Compute(OpKernelContext * ctx)352 void PlaceholderOp::Compute(OpKernelContext* ctx) {
353   if (expected_shape_.dims() > 0) {
354     OP_REQUIRES(ctx, false,
355                 errors::InvalidArgument(
356                     "You must feed a value for placeholder tensor '", name(),
357                     "' with dtype ", DataTypeString(output_type(0)),
358                     " and shape ", expected_shape_.DebugString()));
359   } else {
360     OP_REQUIRES(ctx, false,
361                 errors::InvalidArgument(
362                     "You must feed a value for placeholder tensor '", name(),
363                     "' with dtype ", DataTypeString(output_type(0))));
364   }
365 }
366 
367 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
368 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
369                         PlaceholderOp);
370 // The following GPU/Default kernel registration is used to address the
371 // situation that a placeholder is added in a GPU device context and soft
372 // placement is false. Since a placeholder should never be executed, adding
373 // these GPU kernels has no effect on graph execution.
374 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
375                         PlaceholderOp);
376 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
377                         PlaceholderOp);
378 }  // namespace tensorflow
379