1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/array_ops.cc.
17
18 #define EIGEN_USE_THREADS
19
20 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
21 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22 #define EIGEN_USE_GPU
23 #endif
24
25 #include "tensorflow/core/kernels/constant_op.h"
26
27 #include "tensorflow/core/framework/allocator.h"
28 #include "tensorflow/core/framework/bounds_check.h"
29 #include "tensorflow/core/framework/node_def.pb.h"
30 #include "tensorflow/core/framework/register_types.h"
31 #include "tensorflow/core/framework/tensor.h"
32 #include "tensorflow/core/framework/tensor.pb.h"
33 #include "tensorflow/core/framework/tensor_shape.h"
34 #include "tensorflow/core/framework/tensor_types.h"
35 #include "tensorflow/core/framework/types.h"
36 #include "tensorflow/core/framework/variant_op_registry.h"
37 #include "tensorflow/core/graph/graph_node_util.h"
38 #include "tensorflow/core/kernels/fill_functor.h"
39 #include "tensorflow/core/platform/macros.h"
40 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
41
42 namespace tensorflow {
43
44 namespace {
45
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)46 NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
47 const NodeDef& original = ctx->def();
48 if (std::is_base_of<protobuf::Message, NodeDef>()) {
49 DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
50 ->GetDescriptor()
51 ->field_count(),
52 7)
53 << "The NodeDef format has changed, and the attr-stripping code may "
54 "need to be updated.";
55 }
56 NodeDef ret;
57 ret.set_name(original.name());
58 ret.set_op(original.op());
59 ret.set_device(original.device());
60 // Strip the "value" attr from the returned NodeDef.
61 // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
62 // attrs that affect the cardinality of list-typed inputs and outputs, so it
63 // is safe to drop other attrs from the NodeDef.
64 AddNodeAttr("dtype", ctx->output_type(0), &ret);
65 MergeDebugInfo(original, &ret);
66 if (original.has_experimental_type()) {
67 *ret.mutable_experimental_type() = original.experimental_type();
68 }
69 return ret;
70 }
71
72 } // namespace
73
ConstantOp(OpKernelConstruction * ctx)74 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
75 : OpKernel(ctx, StripTensorDataFromNodeDef(ctx), false),
76 tensor_(ctx->output_type(0)) {
77 const TensorProto* proto = nullptr;
78 profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
79 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
80 OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
81 *proto, AllocatorAttributes(), &tensor_));
82 OP_REQUIRES(
83 ctx, ctx->output_type(0) == tensor_.dtype(),
84 errors::InvalidArgument("Type mismatch between value (",
85 DataTypeString(tensor_.dtype()), ") and dtype (",
86 DataTypeString(ctx->output_type(0)), ")"));
87 }
88
Compute(OpKernelContext * ctx)89 void ConstantOp::Compute(OpKernelContext* ctx) {
90 ctx->set_output(0, tensor_);
91 if (TF_PREDICT_FALSE(ctx->track_allocations())) {
92 ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
93 }
94 }
95
~ConstantOp()96 ConstantOp::~ConstantOp() {}
97
98 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
99 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
100
101 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
102 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
103 #define REGISTER_KERNEL(D, TYPE) \
104 REGISTER_KERNEL_BUILDER( \
105 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
106 ConstantOp);
107 REGISTER_KERNEL(GPU, Eigen::half);
108 REGISTER_KERNEL(GPU, bfloat16);
109 REGISTER_KERNEL(GPU, float);
110 REGISTER_KERNEL(GPU, double);
111 REGISTER_KERNEL(GPU, uint8);
112 REGISTER_KERNEL(GPU, int8);
113 REGISTER_KERNEL(GPU, qint8);
114 REGISTER_KERNEL(GPU, uint16);
115 REGISTER_KERNEL(GPU, int16);
116 REGISTER_KERNEL(GPU, qint16);
117 REGISTER_KERNEL(GPU, quint16);
118 REGISTER_KERNEL(GPU, uint32);
119 REGISTER_KERNEL(GPU, qint32);
120 REGISTER_KERNEL(GPU, int64_t);
121 REGISTER_KERNEL(GPU, uint64);
122 REGISTER_KERNEL(GPU, complex64);
123 REGISTER_KERNEL(GPU, complex128);
124 REGISTER_KERNEL(GPU, bool);
125 REGISTER_KERNEL(GPU, Variant);
126 #undef REGISTER_KERNEL
127 #endif
128
129 #define REGISTER_DEFAULT_KERNEL(TYPE) \
130 REGISTER_KERNEL_BUILDER( \
131 Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
132 ConstantOp);
133 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
134 TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
135 TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
136 TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
137 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
138 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
139 #undef REGISTER_DEFAULT_KERNEL
140
141 typedef Eigen::ThreadPoolDevice CPUDevice;
142 typedef Eigen::GpuDevice GPUDevice;
143
144 template <typename Device, typename T, typename Index>
145 class FillOp : public OpKernel {
146 public:
FillOp(OpKernelConstruction * context)147 explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
148
Compute(OpKernelContext * context)149 void Compute(OpKernelContext* context) override {
150 const Tensor& Tdims = context->input(0);
151 OP_REQUIRES(
152 context,
153 // TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
154 (TensorShapeUtils::IsVector(Tdims.shape()) ||
155 TensorShapeUtils::IsScalar(Tdims.shape())),
156 errors::InvalidArgument("dims must represent a vector, got shape ",
157 Tdims.shape().DebugString()));
158 const Tensor& Tvalue = context->input(1);
159 OP_REQUIRES(
160 context,
161 // TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
162 // scalar.
163 TensorShapeUtils::IsScalar(Tvalue.shape()) ||
164 (TensorShapeUtils::IsVector(Tvalue.shape()) &&
165 Tvalue.shape().dim_size(0) == 1),
166 errors::InvalidArgument("value must represent a scalar, got shape ",
167 Tvalue.shape().DebugString()));
168 auto dims = Tdims.flat<Index>();
169 TensorShape shape;
170 OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
171 reinterpret_cast<const Index*>(dims.data()),
172 dims.size(), &shape));
173 Tensor* out = nullptr;
174 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
175 functor::FillFunctor<Device, T> functor;
176 functor(context->eigen_device<Device>(), out->flat<T>(),
177 Tvalue.scalar<T>());
178 }
179 };
180
181 #define REGISTER_KERNEL(D, TYPE) \
182 REGISTER_KERNEL_BUILDER(Name("Fill") \
183 .Device(DEVICE_##D) \
184 .TypeConstraint<TYPE>("T") \
185 .TypeConstraint<int32>("index_type") \
186 .HostMemory("dims"), \
187 FillOp<D##Device, TYPE, int32>); \
188 REGISTER_KERNEL_BUILDER(Name("Fill") \
189 .Device(DEVICE_##D) \
190 .TypeConstraint<TYPE>("T") \
191 .TypeConstraint<int64_t>("index_type") \
192 .HostMemory("dims"), \
193 FillOp<D##Device, TYPE, int64>);
194
195 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
196 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
197 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
198 // the conversion from uint8 to quint8.
199 REGISTER_KERNEL(CPU, quint8);
200 REGISTER_KERNEL(CPU, quint16);
201 REGISTER_KERNEL(CPU, qint8);
202 REGISTER_KERNEL(CPU, qint16);
203 REGISTER_KERNEL(CPU, qint32);
204 #undef REGISTER_CPU_KERNEL
205
206 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
207 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
208 REGISTER_KERNEL(GPU, Eigen::half);
209 REGISTER_KERNEL(GPU, bfloat16);
210 REGISTER_KERNEL(GPU, float);
211 REGISTER_KERNEL(GPU, double);
212 REGISTER_KERNEL(GPU, complex64);
213 REGISTER_KERNEL(GPU, complex128);
214 REGISTER_KERNEL(GPU, uint8);
215 REGISTER_KERNEL(GPU, int8);
216 REGISTER_KERNEL(GPU, uint16);
217 REGISTER_KERNEL(GPU, int16);
218 REGISTER_KERNEL(GPU, int64_t);
219 REGISTER_KERNEL(GPU, bool);
220 // Currently we do not support filling strings on GPU
221
222 // A special DEVICE_DEFAULT kernel for int32.
223 // TODO(b/25387198): Also enable int32 in device memory. This kernel
224 // registration requires all int32 inputs and outputs to be in host memory.
225 REGISTER_KERNEL_BUILDER(Name("Fill")
226 .Device(DEVICE_DEFAULT)
227 .TypeConstraint<int32>("T")
228 .TypeConstraint<int32>("index_type")
229 .HostMemory("dims")
230 .HostMemory("value")
231 .HostMemory("output"),
232 FillOp<CPUDevice, int32, int32>);
233 #endif
234
235 #undef REGISTER_KERNEL
236
237 template <typename Device, typename T>
238 class ZerosLikeOp : public OpKernel {
239 public:
ZerosLikeOp(OpKernelConstruction * ctx)240 explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
241
Compute(OpKernelContext * ctx)242 void Compute(OpKernelContext* ctx) override {
243 const Tensor& input = ctx->input(0);
244 const Device& d = ctx->eigen_device<Device>();
245 if (std::is_same<T, Variant>::value) {
246 OP_REQUIRES(
247 ctx, input.dims() == 0,
248 errors::InvalidArgument("ZerosLike non-scalar Tensor with "
249 "dtype=DT_VARIANT is not supported."));
250 const Variant& v = input.scalar<Variant>()();
251 // DT_VARIANT tensors must be allocated on CPU since they wrap C++
252 // objects which can not be efficiently represented in GPU memory.
253 int numa_node = ctx->device()->NumaNode();
254 Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
255 Variant* out_v = &(out.scalar<Variant>()());
256 OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
257 ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
258 ctx->set_output(0, out);
259 } else {
260 Tensor* out = nullptr;
261 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
262 {0}, 0, input.shape(), &out));
263 functor::SetZeroFunctor<Device, T> f;
264 f(d, out->flat<T>());
265 }
266 }
267 };
268
269 #define REGISTER_KERNEL(type, dev) \
270 REGISTER_KERNEL_BUILDER( \
271 Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
272 ZerosLikeOp<dev##Device, type>)
273
274 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
275 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
276 REGISTER_CPU(Variant);
277 #undef REGISTER_CPU
278
279 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
280 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
281 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
282 REGISTER_KERNEL(bool, GPU);
283 REGISTER_KERNEL(Eigen::half, GPU);
284 REGISTER_KERNEL(float, GPU);
285 REGISTER_KERNEL(double, GPU);
286 REGISTER_KERNEL(int64_t, GPU);
287 REGISTER_KERNEL(complex64, GPU);
288 REGISTER_KERNEL(complex128, GPU);
289 #endif
290
291 REGISTER_KERNEL(bfloat16, GPU);
292 REGISTER_KERNEL(Variant, GPU);
293 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
294 #undef REGISTER_KERNEL
295
296 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
297 .Device(DEVICE_DEFAULT)
298 .TypeConstraint<int32>("T")
299 .HostMemory("y"),
300 ZerosLikeOp<CPUDevice, int32>);
301
302 template <typename Device, typename T>
303 class OnesLikeOp : public OpKernel {
304 public:
OnesLikeOp(OpKernelConstruction * ctx)305 explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
306
Compute(OpKernelContext * ctx)307 void Compute(OpKernelContext* ctx) override {
308 const Tensor& input = ctx->input(0);
309 Tensor* out = nullptr;
310 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
311 {0}, 0, input.shape(), &out));
312 functor::SetOneFunctor<Device, T> f;
313 f(ctx->eigen_device<Device>(), out->flat<T>());
314 }
315 };
316
317 #define REGISTER_KERNEL(type, dev) \
318 REGISTER_KERNEL_BUILDER( \
319 Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
320 OnesLikeOp<dev##Device, type>)
321
322 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
323 TF_CALL_POD_TYPES(REGISTER_CPU);
324 #undef REGISTER_CPU
325
326 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
327 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
328 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
329 REGISTER_KERNEL(bool, GPU);
330 REGISTER_KERNEL(Eigen::half, GPU);
331 REGISTER_KERNEL(float, GPU);
332 REGISTER_KERNEL(double, GPU);
333 REGISTER_KERNEL(int64_t, GPU);
334 REGISTER_KERNEL(complex64, GPU);
335 REGISTER_KERNEL(complex128, GPU);
336 #endif
337 REGISTER_KERNEL(bfloat16, GPU);
338 REGISTER_KERNEL_BUILDER(Name("OnesLike")
339 .Device(DEVICE_DEFAULT)
340 .TypeConstraint<int32>("T")
341 .HostMemory("y"),
342 OnesLikeOp<CPUDevice, int32>);
343 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
344
345 #undef REGISTER_KERNEL
346
PlaceholderOp(OpKernelConstruction * ctx)347 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
348 OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
349 }
350
Compute(OpKernelContext * ctx)351 void PlaceholderOp::Compute(OpKernelContext* ctx) {
352 if (expected_shape_.dims() > 0) {
353 OP_REQUIRES(ctx, false,
354 errors::InvalidArgument(
355 "You must feed a value for placeholder tensor '", name(),
356 "' with dtype ", DataTypeString(output_type(0)),
357 " and shape ", expected_shape_.DebugString()));
358 } else {
359 OP_REQUIRES(ctx, false,
360 errors::InvalidArgument(
361 "You must feed a value for placeholder tensor '", name(),
362 "' with dtype ", DataTypeString(output_type(0))));
363 }
364 }
365
366 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
367 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
368 PlaceholderOp);
369 // The following GPU/Default kernel registration is used to address the
370 // situation that a placeholder is added in a GPU device context and soft
371 // placement is false. Since a placeholder should never be executed, adding
372 // these GPU kernels has no effect on graph execution.
373 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
374 PlaceholderOp);
375 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
376 PlaceholderOp);
377 } // namespace tensorflow
378