1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/array_ops.cc.
17
18 #define EIGEN_USE_THREADS
19
20 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
21 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
22 #define EIGEN_USE_GPU
23 #endif
24
25 #include "tensorflow/core/kernels/constant_op.h"
26
27 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
28 #include "tensorflow/core/framework/allocator.h"
29 #include "tensorflow/core/framework/bounds_check.h"
30 #include "tensorflow/core/framework/node_def.pb.h"
31 #include "tensorflow/core/framework/register_types.h"
32 #include "tensorflow/core/framework/tensor.h"
33 #include "tensorflow/core/framework/tensor.pb.h"
34 #include "tensorflow/core/framework/tensor_shape.h"
35 #include "tensorflow/core/framework/tensor_types.h"
36 #include "tensorflow/core/framework/types.h"
37 #include "tensorflow/core/framework/variant_op_registry.h"
38 #include "tensorflow/core/graph/graph_node_util.h"
39 #include "tensorflow/core/kernels/fill_functor.h"
40 #include "tensorflow/core/platform/macros.h"
41 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
42
43 namespace tensorflow {
44
45 namespace {
46
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)47 NodeDef StripTensorDataFromNodeDef(OpKernelConstruction* ctx) {
48 const NodeDef& original = ctx->def();
49 if (std::is_base_of<protobuf::Message, NodeDef>()) {
50 DCHECK_EQ(reinterpret_cast<const protobuf::Message*>(&original)
51 ->GetDescriptor()
52 ->field_count(),
53 7)
54 << "The NodeDef format has changed, and the attr-stripping code may "
55 "need to be updated.";
56 }
57 NodeDef ret;
58 ret.set_name(original.name());
59 ret.set_op(original.op());
60 ret.set_device(original.device());
61 // Strip the "value" attr from the returned NodeDef.
62 // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
63 // attrs that affect the cardinality of list-typed inputs and outputs, so it
64 // is safe to drop other attrs from the NodeDef.
65 AddNodeAttr("dtype", ctx->output_type(0), &ret);
66 MergeDebugInfo(original, &ret);
67 if (original.has_experimental_type()) {
68 *ret.mutable_experimental_type() = original.experimental_type();
69 }
70 return ret;
71 }
72
73 } // namespace
74
ConstantOp(OpKernelConstruction * ctx)75 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
76 : OpKernel(ctx, StripTensorDataFromNodeDef(ctx), false),
77 tensor_(ctx->output_type(0)) {
78 const TensorProto* proto = nullptr;
79 profiler::ScopedMemoryDebugAnnotation op_annotation(name_view().data());
80 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
81 OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
82 *proto, AllocatorAttributes(), &tensor_));
83 OP_REQUIRES(
84 ctx, ctx->output_type(0) == tensor_.dtype(),
85 errors::InvalidArgument("Type mismatch between value (",
86 DataTypeString(tensor_.dtype()), ") and dtype (",
87 DataTypeString(ctx->output_type(0)), ")"));
88 }
89
Compute(OpKernelContext * ctx)90 void ConstantOp::Compute(OpKernelContext* ctx) {
91 ctx->set_output(0, tensor_);
92 if (TF_PREDICT_FALSE(ctx->track_allocations())) {
93 ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
94 }
95 }
96
~ConstantOp()97 ConstantOp::~ConstantOp() {}
98
99 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
100 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_TPU_SYSTEM), ConstantOp);
101
102 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
103 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
104 #define REGISTER_KERNEL(D, TYPE) \
105 REGISTER_KERNEL_BUILDER( \
106 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
107 ConstantOp);
108 REGISTER_KERNEL(GPU, Eigen::half);
109 REGISTER_KERNEL(GPU, bfloat16);
110 REGISTER_KERNEL(GPU, float);
111 REGISTER_KERNEL(GPU, double);
112 REGISTER_KERNEL(GPU, uint8);
113 REGISTER_KERNEL(GPU, int8);
114 REGISTER_KERNEL(GPU, qint8);
115 REGISTER_KERNEL(GPU, uint16);
116 REGISTER_KERNEL(GPU, int16);
117 REGISTER_KERNEL(GPU, qint16);
118 REGISTER_KERNEL(GPU, quint16);
119 REGISTER_KERNEL(GPU, uint32);
120 REGISTER_KERNEL(GPU, qint32);
121 REGISTER_KERNEL(GPU, int64_t);
122 REGISTER_KERNEL(GPU, uint64);
123 REGISTER_KERNEL(GPU, complex64);
124 REGISTER_KERNEL(GPU, complex128);
125 REGISTER_KERNEL(GPU, bool);
126 REGISTER_KERNEL(GPU, Variant);
127 #undef REGISTER_KERNEL
128 #endif
129
130 #define REGISTER_DEFAULT_KERNEL(TYPE) \
131 REGISTER_KERNEL_BUILDER( \
132 Name("Const").Device(DEVICE_DEFAULT).TypeConstraint<TYPE>("dtype"), \
133 ConstantOp);
134 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
135 TF_CALL_QUANTIZED_TYPES(REGISTER_DEFAULT_KERNEL);
136 TF_CALL_qint16(REGISTER_DEFAULT_KERNEL);
137 TF_CALL_quint16(REGISTER_DEFAULT_KERNEL);
138 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
139 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
140 #undef REGISTER_DEFAULT_KERNEL
141
142 typedef Eigen::ThreadPoolDevice CPUDevice;
143 typedef Eigen::GpuDevice GPUDevice;
144
145 template <typename Device, typename T, typename Index>
146 class FillOp : public OpKernel {
147 public:
FillOp(OpKernelConstruction * context)148 explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
149
Compute(OpKernelContext * context)150 void Compute(OpKernelContext* context) override {
151 const Tensor& Tdims = context->input(0);
152 OP_REQUIRES(
153 context,
154 // TODO(rmlarsen): Disallow legacy use of scalars to represent shape.
155 (TensorShapeUtils::IsVector(Tdims.shape()) ||
156 TensorShapeUtils::IsScalar(Tdims.shape())),
157 errors::InvalidArgument("dims must represent a vector, got shape ",
158 Tdims.shape().DebugString()));
159 const Tensor& Tvalue = context->input(1);
160 OP_REQUIRES(
161 context,
162 // TODO(rmlarsen): Disallow legacy use of length-1 vector to represent
163 // scalar.
164 TensorShapeUtils::IsScalar(Tvalue.shape()) ||
165 (TensorShapeUtils::IsVector(Tvalue.shape()) &&
166 Tvalue.shape().dim_size(0) == 1),
167 errors::InvalidArgument("value must represent a scalar, got shape ",
168 Tvalue.shape().DebugString()));
169 auto dims = Tdims.flat<Index>();
170 TensorShape shape;
171 OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
172 reinterpret_cast<const Index*>(dims.data()),
173 dims.size(), &shape));
174 Tensor* out = nullptr;
175 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
176 functor::FillFunctor<Device, T> functor;
177 functor(context->eigen_device<Device>(), out->flat<T>(),
178 Tvalue.scalar<T>());
179 }
180 };
181
182 #define REGISTER_KERNEL(D, TYPE) \
183 REGISTER_KERNEL_BUILDER(Name("Fill") \
184 .Device(DEVICE_##D) \
185 .TypeConstraint<TYPE>("T") \
186 .TypeConstraint<int32>("index_type") \
187 .HostMemory("dims"), \
188 FillOp<D##Device, TYPE, int32>); \
189 REGISTER_KERNEL_BUILDER(Name("Fill") \
190 .Device(DEVICE_##D) \
191 .TypeConstraint<TYPE>("T") \
192 .TypeConstraint<int64_t>("index_type") \
193 .HostMemory("dims"), \
194 FillOp<D##Device, TYPE, int64>);
195
196 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
197 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
198 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
199 // the conversion from uint8 to quint8.
200 REGISTER_KERNEL(CPU, quint8);
201 REGISTER_KERNEL(CPU, quint16);
202 REGISTER_KERNEL(CPU, qint8);
203 REGISTER_KERNEL(CPU, qint16);
204 REGISTER_KERNEL(CPU, qint32);
205 #undef REGISTER_CPU_KERNEL
206
207 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
208 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
209 REGISTER_KERNEL(GPU, Eigen::half);
210 REGISTER_KERNEL(GPU, bfloat16);
211 REGISTER_KERNEL(GPU, float);
212 REGISTER_KERNEL(GPU, double);
213 REGISTER_KERNEL(GPU, complex64);
214 REGISTER_KERNEL(GPU, complex128);
215 REGISTER_KERNEL(GPU, uint8);
216 REGISTER_KERNEL(GPU, int8);
217 REGISTER_KERNEL(GPU, uint16);
218 REGISTER_KERNEL(GPU, int16);
219 REGISTER_KERNEL(GPU, int64_t);
220 REGISTER_KERNEL(GPU, bool);
221 // Currently we do not support filling strings on GPU
222
223 // A special DEVICE_DEFAULT kernel for int32.
224 // TODO(b/25387198): Also enable int32 in device memory. This kernel
225 // registration requires all int32 inputs and outputs to be in host memory.
226 REGISTER_KERNEL_BUILDER(Name("Fill")
227 .Device(DEVICE_DEFAULT)
228 .TypeConstraint<int32>("T")
229 .TypeConstraint<int32>("index_type")
230 .HostMemory("dims")
231 .HostMemory("value")
232 .HostMemory("output"),
233 FillOp<CPUDevice, int32, int32>);
234 #endif
235
236 #undef REGISTER_KERNEL
237
238 template <typename Device, typename T>
239 class ZerosLikeOp : public OpKernel {
240 public:
ZerosLikeOp(OpKernelConstruction * ctx)241 explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
242
Compute(OpKernelContext * ctx)243 void Compute(OpKernelContext* ctx) override {
244 const Tensor& input = ctx->input(0);
245 const Device& d = ctx->eigen_device<Device>();
246 if (std::is_same<T, Variant>::value) {
247 OP_REQUIRES(
248 ctx, input.dims() == 0,
249 errors::InvalidArgument("ZerosLike non-scalar Tensor with "
250 "dtype=DT_VARIANT is not supported."));
251 const Variant& v = input.scalar<Variant>()();
252 // DT_VARIANT tensors must be allocated on CPU since they wrap C++
253 // objects which can not be efficiently represented in GPU memory.
254 int numa_node = ctx->device()->NumaNode();
255 Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
256 Variant* out_v = &(out.scalar<Variant>()());
257 OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
258 ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
259 ctx->set_output(0, out);
260 } else {
261 Tensor* out = nullptr;
262 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
263 {0}, 0, input.shape(), &out));
264 functor::SetZeroFunctor<Device, T> f;
265 f(d, out->flat<T>());
266 }
267 }
268 };
269
270 #define REGISTER_KERNEL(type, dev) \
271 REGISTER_KERNEL_BUILDER( \
272 Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
273 ZerosLikeOp<dev##Device, type>)
274
275 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
276 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
277 REGISTER_CPU(Variant);
278 #undef REGISTER_CPU
279
280 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
281 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
282 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
283 REGISTER_KERNEL(bool, GPU);
284 REGISTER_KERNEL(Eigen::half, GPU);
285 REGISTER_KERNEL(float, GPU);
286 REGISTER_KERNEL(double, GPU);
287 REGISTER_KERNEL(int64_t, GPU);
288 REGISTER_KERNEL(complex64, GPU);
289 REGISTER_KERNEL(complex128, GPU);
290 #endif
291
292 REGISTER_KERNEL(bfloat16, GPU);
293 REGISTER_KERNEL(Variant, GPU);
294 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
295 #undef REGISTER_KERNEL
296
297 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
298 .Device(DEVICE_DEFAULT)
299 .TypeConstraint<int32>("T")
300 .HostMemory("y"),
301 ZerosLikeOp<CPUDevice, int32>);
302
303 template <typename Device, typename T>
304 class OnesLikeOp : public OpKernel {
305 public:
OnesLikeOp(OpKernelConstruction * ctx)306 explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
307
Compute(OpKernelContext * ctx)308 void Compute(OpKernelContext* ctx) override {
309 const Tensor& input = ctx->input(0);
310 Tensor* out = nullptr;
311 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
312 {0}, 0, input.shape(), &out));
313 functor::SetOneFunctor<Device, T> f;
314 f(ctx->eigen_device<Device>(), out->flat<T>());
315 }
316 };
317
318 #define REGISTER_KERNEL(type, dev) \
319 REGISTER_KERNEL_BUILDER( \
320 Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
321 OnesLikeOp<dev##Device, type>)
322
323 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
324 TF_CALL_POD_TYPES(REGISTER_CPU);
325 #undef REGISTER_CPU
326
327 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
328 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
329 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
330 REGISTER_KERNEL(bool, GPU);
331 REGISTER_KERNEL(Eigen::half, GPU);
332 REGISTER_KERNEL(float, GPU);
333 REGISTER_KERNEL(double, GPU);
334 REGISTER_KERNEL(int64_t, GPU);
335 REGISTER_KERNEL(complex64, GPU);
336 REGISTER_KERNEL(complex128, GPU);
337 #endif
338 REGISTER_KERNEL(bfloat16, GPU);
339 REGISTER_KERNEL_BUILDER(Name("OnesLike")
340 .Device(DEVICE_DEFAULT)
341 .TypeConstraint<int32>("T")
342 .HostMemory("y"),
343 OnesLikeOp<CPUDevice, int32>);
344 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
345
346 #undef REGISTER_KERNEL
347
PlaceholderOp(OpKernelConstruction * ctx)348 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
349 OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
350 }
351
Compute(OpKernelContext * ctx)352 void PlaceholderOp::Compute(OpKernelContext* ctx) {
353 if (expected_shape_.dims() > 0) {
354 OP_REQUIRES(ctx, false,
355 errors::InvalidArgument(
356 "You must feed a value for placeholder tensor '", name(),
357 "' with dtype ", DataTypeString(output_type(0)),
358 " and shape ", expected_shape_.DebugString()));
359 } else {
360 OP_REQUIRES(ctx, false,
361 errors::InvalidArgument(
362 "You must feed a value for placeholder tensor '", name(),
363 "' with dtype ", DataTypeString(output_type(0))));
364 }
365 }
366
367 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
368 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
369 PlaceholderOp);
370 // The following GPU/Default kernel registration is used to address the
371 // situation that a placeholder is added in a GPU device context and soft
372 // placement is false. Since a placeholder should never be executed, adding
373 // these GPU kernels has no effect on graph execution.
374 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_DEFAULT),
375 PlaceholderOp);
376 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_DEFAULT),
377 PlaceholderOp);
378 } // namespace tensorflow
379