• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
21     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22 #define EIGEN_USE_GPU
23 #endif
24 
25 #include "tensorflow/core/kernels/constant_op.h"
26 
27 #include "tensorflow/core/framework/allocator.h"
28 #include "tensorflow/core/framework/bounds_check.h"
29 #include "tensorflow/core/framework/node_def.pb.h"
30 #include "tensorflow/core/framework/register_types.h"
31 #include "tensorflow/core/framework/tensor.h"
32 #include "tensorflow/core/framework/tensor.pb.h"
33 #include "tensorflow/core/framework/tensor_shape.h"
34 #include "tensorflow/core/framework/tensor_types.h"
35 #include "tensorflow/core/framework/types.h"
36 #include "tensorflow/core/framework/variant_op_registry.h"
37 #include "tensorflow/core/graph/graph_node_util.h"
38 #include "tensorflow/core/kernels/fill_functor.h"
39 #include "tensorflow/core/platform/macros.h"
40 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
41 
42 namespace tensorflow {
43 
44 namespace {
45 
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)46 NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
47   const NodeDef& original = ctx->def();
48   if (std::is_base_of<protobuf::Message, NodeDef>()) {
49     DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
50                   ->GetDescriptor()
51                   ->field_count(),
52               7)
53         << "The NodeDef format has changed, and the attr-stripping code may "
54            "need to be updated.";
55   }
56   NodeDef ret;
57   ret.set_name(original.name());
58   ret.set_op(original.op());
59   ret.set_device(original.device());
60   // Strip the "value" attr from the returned NodeDef.
61   // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
62   // attrs that affect the cardinality of list-typed inputs and outputs, so it
63   // is safe to drop other attrs from the NodeDef.
64   AddNodeAttr("dtype", ctx->output_type(0), &ret);
65   MergeDebugInfo(original, &ret);
66   if (original.has_experimental_type()) {
67     *ret.mutable_experimental_type() = original.experimental_type();
68   }
69   return ret;
70 }
71 
72 }  // namespace
73 
ConstantOp(OpKernelConstruction * ctx)74 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
75     : OpKernel(ctx, StripTensorDataFromNodeDef(ctx), false),
76       tensor_(ctx->output_type(0)) {
77   const TensorProto* proto = nullptr;
78   profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
79   OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
80   OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
81                           *proto, AllocatorAttributes(), &tensor_));
82   OP_REQUIRES(
83       ctx, ctx->output_type(0) == tensor_.dtype(),
84       errors::InvalidArgument("Type mismatch between value (",
85                               DataTypeString(tensor_.dtype()), ") and dtype (",
86                               DataTypeString(ctx->output_type(0)), ")"));
87 }
88 
Compute(OpKernelContext * ctx)89 void ConstantOp::Compute(OpKernelContext* ctx) {
90   ctx->set_output(0, tensor_);
91   if (TF_PREDICT_FALSE(ctx->track_allocations())) {
92     ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
93   }
94 }
95 
~ConstantOp()96 ConstantOp::~ConstantOp() {}
97 
98 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
99 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
100 
101 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
102     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
103 #define REGISTER_KERNEL(D, TYPE)                                      \
104   REGISTER_KERNEL_BUILDER(                                            \
105       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
106       ConstantOp);
107 REGISTER_KERNEL(GPU, Eigen::half);
108 REGISTER_KERNEL(GPU, bfloat16);
109 REGISTER_KERNEL(GPU, float);
110 REGISTER_KERNEL(GPU, double);
111 REGISTER_KERNEL(GPU, uint8);
112 REGISTER_KERNEL(GPU, int8);
113 REGISTER_KERNEL(GPU, qint8);
114 REGISTER_KERNEL(GPU, uint16);
115 REGISTER_KERNEL(GPU, int16);
116 REGISTER_KERNEL(GPU, qint16);
117 REGISTER_KERNEL(GPU, quint16);
118 REGISTER_KERNEL(GPU, uint32);
119 REGISTER_KERNEL(GPU, qint32);
120 REGISTER_KERNEL(GPU, int64_t);
121 REGISTER_KERNEL(GPU, uint64);
122 REGISTER_KERNEL(GPU, complex64);
123 REGISTER_KERNEL(GPU, complex128);
124 REGISTER_KERNEL(GPU, bool);
125 REGISTER_KERNEL(GPU, Variant);
126 #undef REGISTER_KERNEL
127 #endif
128 
129 #define REGISTER_DEFAULT_KERNEL(TYPE)                                     \
130   REGISTER_KERNEL_BUILDER(                                                \
131       Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
132       ConstantOp);
133 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
134 TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
135 TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
136 TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
137 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
138 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
139 #undef REGISTER_DEFAULT_KERNEL
140 
141 typedef Eigen::ThreadPoolDevice CPUDevice;
142 typedef Eigen::GpuDevice GPUDevice;
143 
144 template <typename Device, typename T, typename Index>
145 class FillOp : public OpKernel {
146  public:
FillOp(OpKernelConstruction * context)147   explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
148 
Compute(OpKernelContext * context)149   void Compute(OpKernelContext* context) override {
150     const Tensor& Tdims = context->input(0);
151     OP_REQUIRES(
152         context,
153         // TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
154         (TensorShapeUtils::IsVector(Tdims.shape()) ||
155          TensorShapeUtils::IsScalar(Tdims.shape())),
156         errors::InvalidArgument("dims must represent a vector, got shape ",
157                                 Tdims.shape().DebugString()));
158     const Tensor& Tvalue = context->input(1);
159     OP_REQUIRES(
160         context,
161         // TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
162         // scalar.
163         TensorShapeUtils::IsScalar(Tvalue.shape()) ||
164             (TensorShapeUtils::IsVector(Tvalue.shape()) &&
165              Tvalue.shape().dim_size(0) == 1),
166         errors::InvalidArgument("value must represent a scalar, got shape ",
167                                 Tvalue.shape().DebugString()));
168     auto dims = Tdims.flat<Index>();
169     TensorShape shape;
170     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
171                                 reinterpret_cast<const Index*>(dims.data()),
172                                 dims.size(), &shape));
173     Tensor* out = nullptr;
174     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
175     functor::FillFunctor<Device, T> functor;
176     functor(context->eigen_device<Device>(), out->flat<T>(),
177             Tvalue.scalar<T>());
178   }
179 };
180 
181 #define REGISTER_KERNEL(D, TYPE)                                     \
182   REGISTER_KERNEL_BUILDER(Name("Fill")                               \
183                               .Device(DEVICE_##D)                    \
184                               .TypeConstraint<TYPE>("T")             \
185                               .TypeConstraint<int32>("index_type")   \
186                               .HostMemory("dims"),                   \
187                           FillOp<D##Device, TYPE, int32>);           \
188   REGISTER_KERNEL_BUILDER(Name("Fill")                               \
189                               .Device(DEVICE_##D)                    \
190                               .TypeConstraint<TYPE>("T")             \
191                               .TypeConstraint<int64_t>("index_type") \
192                               .HostMemory("dims"),                   \
193                           FillOp<D##Device, TYPE, int64>);
194 
195 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
196 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
197 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
198 // the conversion from uint8 to quint8.
199 REGISTER_KERNEL(CPU, quint8);
200 REGISTER_KERNEL(CPU, quint16);
201 REGISTER_KERNEL(CPU, qint8);
202 REGISTER_KERNEL(CPU, qint16);
203 REGISTER_KERNEL(CPU, qint32);
204 #undef REGISTER_CPU_KERNEL
205 
206 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
207     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
208 REGISTER_KERNEL(GPU, Eigen::half);
209 REGISTER_KERNEL(GPU, bfloat16);
210 REGISTER_KERNEL(GPU, float);
211 REGISTER_KERNEL(GPU, double);
212 REGISTER_KERNEL(GPU, complex64);
213 REGISTER_KERNEL(GPU, complex128);
214 REGISTER_KERNEL(GPU, uint8);
215 REGISTER_KERNEL(GPU, int8);
216 REGISTER_KERNEL(GPU, uint16);
217 REGISTER_KERNEL(GPU, int16);
218 REGISTER_KERNEL(GPU, int64_t);
219 REGISTER_KERNEL(GPU, bool);
220 // Currently we do not support filling strings on GPU
221 
222 // A special DEVICE_DEFAULT kernel for int32.
223 // TODO(b/25387198): Also enable int32 in device memory. This kernel
224 // registration requires all int32 inputs and outputs to be in host memory.
225 REGISTER_KERNEL_BUILDER(Name("Fill")
226                             .Device(DEVICE_DEFAULT)
227                             .TypeConstraint<int32>("T")
228                             .TypeConstraint<int32>("index_type")
229                             .HostMemory("dims")
230                             .HostMemory("value")
231                             .HostMemory("output"),
232                         FillOp<CPUDevice, int32, int32>);
233 #endif
234 
235 #undef REGISTER_KERNEL
236 
237 template <typename Device, typename T>
238 class ZerosLikeOp : public OpKernel {
239  public:
ZerosLikeOp(OpKernelConstruction * ctx)240   explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
241 
Compute(OpKernelContext * ctx)242   void Compute(OpKernelContext* ctx) override {
243     const Tensor& input = ctx->input(0);
244     const Device& d = ctx->eigen_device<Device>();
245     if (std::is_same<T, Variant>::value) {
246       OP_REQUIRES(
247           ctx, input.dims() == 0,
248           errors::InvalidArgument("ZerosLike non-scalar Tensor with "
249                                   "dtype=DT_VARIANT is not supported."));
250       const Variant& v = input.scalar<Variant>()();
251       // DT_VARIANT tensors must be allocated on CPU since they wrap C++
252       // objects which can not be efficiently represented in GPU memory.
253       int numa_node = ctx->device()->NumaNode();
254       Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
255       Variant* out_v = &(out.scalar<Variant>()());
256       OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
257                               ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
258       ctx->set_output(0, out);
259     } else {
260       Tensor* out = nullptr;
261       OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
262                               {0}, 0, input.shape(), &out));
263       functor::SetZeroFunctor<Device, T> f;
264       f(d, out->flat<T>());
265     }
266   }
267 };
268 
269 #define REGISTER_KERNEL(type, dev)                                      \
270   REGISTER_KERNEL_BUILDER(                                              \
271       Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
272       ZerosLikeOp<dev##Device, type>)
273 
274 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
275 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
276 REGISTER_CPU(Variant);
277 #undef REGISTER_CPU
278 
279 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
280     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
281 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
282 REGISTER_KERNEL(bool, GPU);
283 REGISTER_KERNEL(Eigen::half, GPU);
284 REGISTER_KERNEL(float, GPU);
285 REGISTER_KERNEL(double, GPU);
286 REGISTER_KERNEL(int64_t, GPU);
287 REGISTER_KERNEL(complex64, GPU);
288 REGISTER_KERNEL(complex128, GPU);
289 #endif
290 
291 REGISTER_KERNEL(bfloat16, GPU);
292 REGISTER_KERNEL(Variant, GPU);
293 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
294 #undef REGISTER_KERNEL
295 
296 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
297                             .Device(DEVICE_DEFAULT)
298                             .TypeConstraint<int32>("T")
299                             .HostMemory("y"),
300                         ZerosLikeOp<CPUDevice, int32>);
301 
302 template <typename Device, typename T>
303 class OnesLikeOp : public OpKernel {
304  public:
OnesLikeOp(OpKernelConstruction * ctx)305   explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
306 
Compute(OpKernelContext * ctx)307   void Compute(OpKernelContext* ctx) override {
308     const Tensor& input = ctx->input(0);
309     Tensor* out = nullptr;
310     OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
311                             {0}, 0, input.shape(), &out));
312     functor::SetOneFunctor<Device, T> f;
313     f(ctx->eigen_device<Device>(), out->flat<T>());
314   }
315 };
316 
317 #define REGISTER_KERNEL(type, dev)                                     \
318   REGISTER_KERNEL_BUILDER(                                             \
319       Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
320       OnesLikeOp<dev##Device, type>)
321 
322 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
323 TF_CALL_POD_TYPES(REGISTER_CPU);
324 #undef REGISTER_CPU
325 
326 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
327     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
328 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
329 REGISTER_KERNEL(bool, GPU);
330 REGISTER_KERNEL(Eigen::half, GPU);
331 REGISTER_KERNEL(float, GPU);
332 REGISTER_KERNEL(double, GPU);
333 REGISTER_KERNEL(int64_t, GPU);
334 REGISTER_KERNEL(complex64, GPU);
335 REGISTER_KERNEL(complex128, GPU);
336 #endif
337 REGISTER_KERNEL(bfloat16, GPU);
338 REGISTER_KERNEL_BUILDER(Name("OnesLike")
339                             .Device(DEVICE_DEFAULT)
340                             .TypeConstraint<int32>("T")
341                             .HostMemory("y"),
342                         OnesLikeOp<CPUDevice, int32>);
343 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
344 
345 #undef REGISTER_KERNEL
346 
PlaceholderOp(OpKernelConstruction * ctx)347 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
348   OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
349 }
350 
Compute(OpKernelContext * ctx)351 void PlaceholderOp::Compute(OpKernelContext* ctx) {
352   if (expected_shape_.dims() > 0) {
353     OP_REQUIRES(ctx, false,
354                 errors::InvalidArgument(
355                     "You must feed a value for placeholder tensor '", name(),
356                     "' with dtype ", DataTypeString(output_type(0)),
357                     " and shape ", expected_shape_.DebugString()));
358   } else {
359     OP_REQUIRES(ctx, false,
360                 errors::InvalidArgument(
361                     "You must feed a value for placeholder tensor '", name(),
362                     "' with dtype ", DataTypeString(output_type(0))));
363   }
364 }
365 
366 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
367 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
368                         PlaceholderOp);
369 // The following GPU/Default kernel registration is used to address the
370 // situation that a placeholder is added in a GPU device context and soft
371 // placement is false. Since a placeholder should never be executed, adding
372 // these GPU kernels has no effect on graph execution.
373 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
374                         PlaceholderOp);
375 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
376                         PlaceholderOp);
377 }  // namespace tensorflow
378