• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #include "tensorflow/core/kernels/shape_ops.h"
19 #include "tensorflow/core/framework/node_def.pb.h"
20 #include "tensorflow/core/framework/register_types.h"
21 
22 namespace tensorflow {
23 
24 // Shape ----------------------------------------
25 REGISTER_KERNEL_BUILDER(Name("Shape")
26                             .Device(DEVICE_CPU)
27                             .HostMemory("output")
28                             .TypeConstraint<int32>("out_type"),
29                         ShapeOp<int32>);
30 REGISTER_KERNEL_BUILDER(Name("Shape")
31                             .Device(DEVICE_CPU)
32                             .HostMemory("output")
33                             .TypeConstraint<int64>("out_type"),
34                         ShapeOp<int64>);
35 
36 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
37 #define REGISTER_GPU_KERNEL(type)                                \
38   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
39                               .Device(DEVICE_GPU)                \
40                               .HostMemory("output")              \
41                               .TypeConstraint<int32>("out_type") \
42                               .TypeConstraint<type>("T"),        \
43                           ShapeOp<int32>);                       \
44   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
45                               .Device(DEVICE_GPU)                \
46                               .HostMemory("output")              \
47                               .TypeConstraint<int64>("out_type") \
48                               .TypeConstraint<type>("T"),        \
49                           ShapeOp<int64>);
50 
51 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
52 TF_CALL_bool(REGISTER_GPU_KERNEL);
53 TF_CALL_variant(REGISTER_GPU_KERNEL);
54 #undef REGISTER_GPU_KERNEL
55 
56 // A special GPU kernel for int32.
57 // TODO(b/25387198): Also enable int32 in device memory. This kernel
58 // registration requires all int32 inputs and outputs to be in host memory.
59 REGISTER_KERNEL_BUILDER(Name("Shape")
60                             .Device(DEVICE_GPU)
61                             .HostMemory("input")
62                             .HostMemory("output")
63                             .TypeConstraint<int32>("T")
64                             .TypeConstraint<int32>("out_type"),
65                         ShapeOp<int32>);
66 REGISTER_KERNEL_BUILDER(Name("Shape")
67                             .Device(DEVICE_GPU)
68                             .HostMemory("input")
69                             .HostMemory("output")
70                             .TypeConstraint<int32>("T")
71                             .TypeConstraint<int64>("out_type"),
72                         ShapeOp<int64>);
73 
74 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
75 
76 #define REGISTER_DEFAULT_KERNEL(type)                            \
77   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
78                               .Device(DEVICE_DEFAULT)            \
79                               .HostMemory("output")              \
80                               .TypeConstraint<int32>("out_type") \
81                               .TypeConstraint<type>("T"),        \
82                           ShapeOp<int32>);                       \
83   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
84                               .Device(DEVICE_DEFAULT)            \
85                               .HostMemory("output")              \
86                               .TypeConstraint<int64>("out_type") \
87                               .TypeConstraint<type>("T"),        \
88                           ShapeOp<int64>);
89 
90 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
91 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
92 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
93 #undef REGISTER_DEFAULT_KERNEL
94 
95 // A special GPU kernel for int32.
96 // TODO(b/25387198): Also enable int32 in device memory. This kernel
97 // registration requires all int32 inputs and outputs to be in host memory.
98 REGISTER_KERNEL_BUILDER(Name("Shape")
99                             .Device(DEVICE_DEFAULT)
100                             .HostMemory("input")
101                             .HostMemory("output")
102                             .TypeConstraint<int32>("T")
103                             .TypeConstraint<int32>("out_type"),
104                         ShapeOp<int32>);
105 REGISTER_KERNEL_BUILDER(Name("Shape")
106                             .Device(DEVICE_DEFAULT)
107                             .HostMemory("input")
108                             .HostMemory("output")
109                             .TypeConstraint<int32>("T")
110                             .TypeConstraint<int64>("out_type"),
111                         ShapeOp<int64>);
112 
113 // ShapeN ---------------------------------------
114 REGISTER_KERNEL_BUILDER(Name("ShapeN")
115                             .Device(DEVICE_CPU)
116                             .HostMemory("output")
117                             .TypeConstraint<int32>("out_type"),
118                         ShapeNOp<int32>);
119 REGISTER_KERNEL_BUILDER(Name("ShapeN")
120                             .Device(DEVICE_CPU)
121                             .HostMemory("output")
122                             .TypeConstraint<int64>("out_type"),
123                         ShapeNOp<int64>);
124 
125 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
126 #define REGISTER_GPU_KERNEL(type)                                \
127   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
128                               .Device(DEVICE_GPU)                \
129                               .HostMemory("output")              \
130                               .TypeConstraint<int32>("out_type") \
131                               .TypeConstraint<type>("T"),        \
132                           ShapeNOp<int32>);                      \
133   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
134                               .Device(DEVICE_GPU)                \
135                               .HostMemory("output")              \
136                               .TypeConstraint<int64>("out_type") \
137                               .TypeConstraint<type>("T"),        \
138                           ShapeNOp<int64>)
139 
140 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
141 TF_CALL_bool(REGISTER_GPU_KERNEL);
142 #undef REGISTER_GPU_KERNEL
143 
144 // A special GPU kernel for int32.
145 // TODO(b/25387198): Also enable int32 in device memory. This kernel
146 // registration requires all int32 inputs and outputs to be in host memory.
147 REGISTER_KERNEL_BUILDER(Name("ShapeN")
148                             .Device(DEVICE_GPU)
149                             .HostMemory("input")
150                             .HostMemory("output")
151                             .TypeConstraint<int32>("T")
152                             .TypeConstraint<int32>("out_type"),
153                         ShapeNOp<int32>);
154 REGISTER_KERNEL_BUILDER(Name("ShapeN")
155                             .Device(DEVICE_GPU)
156                             .HostMemory("input")
157                             .HostMemory("output")
158                             .TypeConstraint<int32>("T")
159                             .TypeConstraint<int64>("out_type"),
160                         ShapeNOp<int64>);
161 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
162 
163 #define REGISTER_DEFAULT_KERNEL(type)                            \
164   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
165                               .Device(DEVICE_DEFAULT)            \
166                               .HostMemory("output")              \
167                               .TypeConstraint<int32>("out_type") \
168                               .TypeConstraint<type>("T"),        \
169                           ShapeNOp<int32>);                      \
170   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
171                               .Device(DEVICE_DEFAULT)            \
172                               .HostMemory("output")              \
173                               .TypeConstraint<int64>("out_type") \
174                               .TypeConstraint<type>("T"),        \
175                           ShapeNOp<int64>)
176 
177 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
178 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
179 #undef REGISTER_DEFAULT_KERNEL
180 
181 // A special GPU kernel for int32.
182 // TODO(b/25387198): Also enable int32 in device memory. This kernel
183 // registration requires all int32 inputs and outputs to be in host memory.
184 REGISTER_KERNEL_BUILDER(Name("ShapeN")
185                             .Device(DEVICE_DEFAULT)
186                             .HostMemory("input")
187                             .HostMemory("output")
188                             .TypeConstraint<int32>("T")
189                             .TypeConstraint<int32>("out_type"),
190                         ShapeNOp<int32>);
191 REGISTER_KERNEL_BUILDER(Name("ShapeN")
192                             .Device(DEVICE_DEFAULT)
193                             .HostMemory("input")
194                             .HostMemory("output")
195                             .TypeConstraint<int32>("T")
196                             .TypeConstraint<int64>("out_type"),
197                         ShapeNOp<int64>);
198 
199 // Rank ------------------------------------------
200 REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"),
201                         RankOp);
202 
203 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
204 #define REGISTER_GPU_KERNEL(type)                        \
205   REGISTER_KERNEL_BUILDER(Name("Rank")                   \
206                               .Device(DEVICE_GPU)        \
207                               .TypeConstraint<type>("T") \
208                               .HostMemory("output"),     \
209                           RankOp);
210 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
211 TF_CALL_variant(REGISTER_GPU_KERNEL);
212 #undef REGISTER_GPU_KERNEL
213 
214 // A special GPU kernel for int32 and bool.
215 // TODO(b/25387198): Also enable int32 in device memory. This kernel
216 // registration requires all int32 inputs and outputs to be in host memory.
217 REGISTER_KERNEL_BUILDER(Name("Rank")
218                             .Device(DEVICE_GPU)
219                             .TypeConstraint<int32>("T")
220                             .HostMemory("input")
221                             .HostMemory("output"),
222                         RankOp);
223 
224 REGISTER_KERNEL_BUILDER(Name("Rank")
225                             .Device(DEVICE_GPU)
226                             .TypeConstraint<bool>("T")
227                             .HostMemory("input")
228                             .HostMemory("output"),
229                         RankOp);
230 
231 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
232 
233 #define REGISTER_DEFAULT_KERNEL(type)                    \
234   REGISTER_KERNEL_BUILDER(Name("Rank")                   \
235                               .Device(DEVICE_DEFAULT)    \
236                               .TypeConstraint<type>("T") \
237                               .HostMemory("output"),     \
238                           RankOp);
239 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
240 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
241 #undef REGISTER_DEFAULT_KERNEL
242 
243 // A special GPU kernel for int32 and bool.
244 // TODO(b/25387198): Also enable int32 in device memory. This kernel
245 // registration requires all int32 inputs and outputs to be in host memory.
246 REGISTER_KERNEL_BUILDER(Name("Rank")
247                             .Device(DEVICE_DEFAULT)
248                             .TypeConstraint<int32>("T")
249                             .HostMemory("input")
250                             .HostMemory("output"),
251                         RankOp);
252 
253 REGISTER_KERNEL_BUILDER(Name("Rank")
254                             .Device(DEVICE_DEFAULT)
255                             .TypeConstraint<bool>("T")
256                             .HostMemory("input")
257                             .HostMemory("output"),
258                         RankOp);
259 
260 // Size ------------------------------------------
261 REGISTER_KERNEL_BUILDER(Name("Size")
262                             .Device(DEVICE_CPU)
263                             .HostMemory("output")
264                             .TypeConstraint<int32>("out_type"),
265                         SizeOp<int32>);
266 REGISTER_KERNEL_BUILDER(Name("Size")
267                             .Device(DEVICE_CPU)
268                             .HostMemory("output")
269                             .TypeConstraint<int64>("out_type"),
270                         SizeOp<int64>);
271 
272 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
273 #define REGISTER_GPU_KERNEL(type)                                \
274   REGISTER_KERNEL_BUILDER(Name("Size")                           \
275                               .Device(DEVICE_GPU)                \
276                               .TypeConstraint<type>("T")         \
277                               .TypeConstraint<int32>("out_type") \
278                               .HostMemory("output"),             \
279                           SizeOp<int32>);                        \
280   REGISTER_KERNEL_BUILDER(Name("Size")                           \
281                               .Device(DEVICE_GPU)                \
282                               .TypeConstraint<type>("T")         \
283                               .TypeConstraint<int64>("out_type") \
284                               .HostMemory("output"),             \
285                           SizeOp<int64>);
286 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
287 TF_CALL_bool(REGISTER_GPU_KERNEL);
288 TF_CALL_variant(REGISTER_GPU_KERNEL);
289 #undef REGISTER_GPU_KERNEL
290 
291 // A special GPU kernel for int32.
292 // TODO(b/25387198): Also enable int32 in device memory. This kernel
293 // registration requires all int32 inputs and outputs to be in host memory.
294 REGISTER_KERNEL_BUILDER(Name("Size")
295                             .Device(DEVICE_GPU)
296                             .TypeConstraint<int32>("T")
297                             .TypeConstraint<int32>("out_type")
298                             .HostMemory("input")
299                             .HostMemory("output"),
300                         SizeOp<int32>);
301 REGISTER_KERNEL_BUILDER(Name("Size")
302                             .Device(DEVICE_GPU)
303                             .TypeConstraint<int32>("T")
304                             .TypeConstraint<int64>("out_type")
305                             .HostMemory("input")
306                             .HostMemory("output"),
307                         SizeOp<int64>);
308 
309 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
310 
311 #define REGISTER_DEFAULT_KERNEL(type)                            \
312   REGISTER_KERNEL_BUILDER(Name("Size")                           \
313                               .Device(DEVICE_DEFAULT)            \
314                               .TypeConstraint<type>("T")         \
315                               .TypeConstraint<int32>("out_type") \
316                               .HostMemory("output"),             \
317                           SizeOp<int32>);                        \
318   REGISTER_KERNEL_BUILDER(Name("Size")                           \
319                               .Device(DEVICE_DEFAULT)            \
320                               .TypeConstraint<type>("T")         \
321                               .TypeConstraint<int64>("out_type") \
322                               .HostMemory("output"),             \
323                           SizeOp<int64>);
324 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
325 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
326 TF_CALL_variant(REGISTER_DEFAULT_KERNEL);
327 #undef REGISTER_DEFAULT_KERNEL
328 
329 // A special GPU kernel for int32.
330 // TODO(b/25387198): Also enable int32 in device memory. This kernel
331 // registration requires all int32 inputs and outputs to be in host memory.
332 REGISTER_KERNEL_BUILDER(Name("Size")
333                             .Device(DEVICE_DEFAULT)
334                             .TypeConstraint<int32>("T")
335                             .TypeConstraint<int32>("out_type")
336                             .HostMemory("input")
337                             .HostMemory("output"),
338                         SizeOp<int32>);
339 REGISTER_KERNEL_BUILDER(Name("Size")
340                             .Device(DEVICE_DEFAULT)
341                             .TypeConstraint<int32>("T")
342                             .TypeConstraint<int64>("out_type")
343                             .HostMemory("input")
344                             .HostMemory("output"),
345                         SizeOp<int64>);
346 
347 // ExpandDims ------------------------------------
348 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
349                             .Device(DEVICE_CPU)
350                             .HostMemory("dim")
351                             .TypeConstraint<int32>("Tdim"),
352                         ExpandDimsOp<int32>);
353 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
354                             .Device(DEVICE_CPU)
355                             .HostMemory("dim")
356                             .TypeConstraint<int64>("Tdim"),
357                         ExpandDimsOp<int64>);
358 
359 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
360 #define REGISTER_GPU_KERNEL(type)                            \
361   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
362                               .Device(DEVICE_GPU)            \
363                               .TypeConstraint<type>("T")     \
364                               .TypeConstraint<int32>("Tdim") \
365                               .HostMemory("dim"),            \
366                           ExpandDimsOp<int32>);              \
367   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
368                               .Device(DEVICE_GPU)            \
369                               .TypeConstraint<type>("T")     \
370                               .TypeConstraint<int64>("Tdim") \
371                               .HostMemory("dim"),            \
372                           ExpandDimsOp<int64>);
373 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
374 TF_CALL_bool(REGISTER_GPU_KERNEL);
375 #undef REGISTER_GPU_KERNEL
376 
377 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
378                             .Device(DEVICE_GPU)
379                             .TypeConstraint<int32>("T")
380                             .TypeConstraint<int32>("Tdim")
381                             .HostMemory("input")
382                             .HostMemory("dim")
383                             .HostMemory("output"),
384                         ExpandDimsOp<int32>);
385 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
386                             .Device(DEVICE_GPU)
387                             .TypeConstraint<int32>("T")
388                             .TypeConstraint<int64>("Tdim")
389                             .HostMemory("input")
390                             .HostMemory("dim")
391                             .HostMemory("output"),
392                         ExpandDimsOp<int64>);
393 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
394 
395 #define REGISTER_DEFAULT_KERNEL(type)                        \
396   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
397                               .Device(DEVICE_DEFAULT)        \
398                               .TypeConstraint<type>("T")     \
399                               .TypeConstraint<int32>("Tdim") \
400                               .HostMemory("dim"),            \
401                           ExpandDimsOp<int32>);              \
402   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
403                               .Device(DEVICE_DEFAULT)        \
404                               .TypeConstraint<type>("T")     \
405                               .TypeConstraint<int64>("Tdim") \
406                               .HostMemory("dim"),            \
407                           ExpandDimsOp<int64>);
408 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
409 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
410 #undef REGISTER_DEFAULT_KERNEL
411 
412 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
413                             .Device(DEVICE_DEFAULT)
414                             .TypeConstraint<int32>("T")
415                             .TypeConstraint<int32>("Tdim")
416                             .HostMemory("input")
417                             .HostMemory("dim")
418                             .HostMemory("output"),
419                         ExpandDimsOp<int32>);
420 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
421                             .Device(DEVICE_DEFAULT)
422                             .TypeConstraint<int32>("T")
423                             .TypeConstraint<int64>("Tdim")
424                             .HostMemory("input")
425                             .HostMemory("dim")
426                             .HostMemory("output"),
427                         ExpandDimsOp<int64>);
428 
429 // Squeeze ---------------------------------------
430 REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp);
431 
432 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
433 #define REGISTER_GPU_KERNEL(type)                                   \
434   REGISTER_KERNEL_BUILDER(                                          \
435       Name("Squeeze").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
436       SqueezeOp);
437 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
438 TF_CALL_bool(REGISTER_GPU_KERNEL);
439 #undef REGISTER_GPU_KERNEL
440 
441 // A special GPU kernel for int32.
442 // TODO(b/25387198): Also enable int32 in device memory. This kernel
443 // registration requires all int32 inputs and outputs to be in host memory.
444 REGISTER_KERNEL_BUILDER(Name("Squeeze")
445                             .Device(DEVICE_GPU)
446                             .TypeConstraint<int32>("T")
447                             .HostMemory("input")
448                             .HostMemory("output"),
449                         SqueezeOp);
450 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
451 
452 #define REGISTER_DEFAULT_KERNEL(type)                                   \
453   REGISTER_KERNEL_BUILDER(                                              \
454       Name("Squeeze").Device(DEVICE_DEFAULT).TypeConstraint<type>("T"), \
455       SqueezeOp);
456 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_DEFAULT_KERNEL);
457 TF_CALL_bool(REGISTER_DEFAULT_KERNEL);
458 #undef REGISTER_DEFAULT_KERNEL
459 
460 // A special GPU kernel for int32.
461 // TODO(b/25387198): Also enable int32 in device memory. This kernel
462 // registration requires all int32 inputs and outputs to be in host memory.
463 REGISTER_KERNEL_BUILDER(Name("Squeeze")
464                             .Device(DEVICE_DEFAULT)
465                             .TypeConstraint<int32>("T")
466                             .HostMemory("input")
467                             .HostMemory("output"),
468                         SqueezeOp);
469 
470 class EnsureShapeOp : public OpKernel {
471  public:
EnsureShapeOp(OpKernelConstruction * ctx)472   explicit EnsureShapeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
473     OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
474   }
475 
Compute(OpKernelContext * ctx)476   void Compute(OpKernelContext* ctx) override {
477     TensorShape shape;
478     OP_REQUIRES_OK(ctx, shape_op_helpers::GetShape(ctx, 0, &shape));
479 
480     if (!expected_shape_.IsCompatibleWith(shape)) {
481       ctx->SetStatus(errors::InvalidArgument(
482           "Shape of tensor ", this->def().input(0), " ", shape.DebugString(),
483           " is not compatible with expected shape ",
484           expected_shape_.DebugString(), "."));
485     }
486 
487     // If shape matches, outputs the tensor.
488     if (IsRefType(ctx->input_dtype(0))) {
489       ctx->forward_ref_input_to_ref_output(0, 0);
490     } else {
491       ctx->set_output(0, ctx->input(0));
492     }
493   }
494 
IsExpensive()495   bool IsExpensive() override { return false; }
496 
497  private:
498   PartialTensorShape expected_shape_;
499 };
500 
501 // NOTE(rachelim): The kernel registrations for EnsureShapeOp are identical to
502 // those of the identity op, since the ops have the same device type
503 // constraints.
504 REGISTER_KERNEL_BUILDER(Name("EnsureShape").Device(DEVICE_CPU), EnsureShapeOp);
505 
506 #define REGISTER_GPU_KERNEL(type)                                       \
507   REGISTER_KERNEL_BUILDER(                                              \
508       Name("EnsureShape").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
509       EnsureShapeOp)
510 
511 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
512 REGISTER_GPU_KERNEL(Variant);
513 
514 #undef REGISTER_GPU_KERNEL
515 
516 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
517 // A special GPU kernel for int32 and bool.
518 // TODO(b/25387198): Also enable int32 in device memory. This kernel
519 // registration requires all int32 inputs and outputs to be in host memory.
520 #define REGISTER_GPU_HOST_KERNEL(type)                    \
521   REGISTER_KERNEL_BUILDER(Name("EnsureShape")             \
522                               .Device(DEVICE_GPU)         \
523                               .HostMemory("input")        \
524                               .HostMemory("output")       \
525                               .TypeConstraint<type>("T"), \
526                           EnsureShapeOp)
527 
528 REGISTER_GPU_HOST_KERNEL(int32);
529 REGISTER_GPU_HOST_KERNEL(bool);
530 REGISTER_GPU_HOST_KERNEL(tstring);
531 REGISTER_GPU_HOST_KERNEL(ResourceHandle);
532 
533 #undef REGISTER_GPU_HOST_KERNEL
534 
535 #endif
536 }  // namespace tensorflow
537