• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #define EIGEN_USE_THREADS
17 
18 #include "tensorflow/core/kernels/sparse_fill_empty_rows_op.h"
19 
20 #include <algorithm>
21 #include <numeric>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 
26 #include "tensorflow/core/framework/op_kernel.h"
27 #include "tensorflow/core/framework/register_types.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/tensor_util.h"
30 #include "tensorflow/core/framework/types.h"
31 #include "tensorflow/core/lib/gtl/inlined_vector.h"
32 #include "tensorflow/core/util/sparse/sparse_tensor.h"
33 
34 namespace tensorflow {
35 
36 using CPUDevice = Eigen::ThreadPoolDevice;
37 using GPUDevice = Eigen::GpuDevice;
38 
39 namespace functor {
40 
41 template <typename T, typename Tindex>
42 struct SparseFillEmptyRows<CPUDevice, T, Tindex> {
operator ()tensorflow::functor::SparseFillEmptyRows43   Status operator()(OpKernelContext* context, const Tensor& default_value_t,
44                     const Tensor& indices_t, const Tensor& values_t,
45                     const Tensor& dense_shape_t,
46                     typename AsyncOpKernel::DoneCallback done) {
47     (void)done;  // Unused (only used in GPU implementation)
48     const int kOutputIndicesOutput = 0;
49     const int kOutputValuesOutput = 1;
50     const int kEmptyRowIndicatorOutput = 2;
51     const int kReverseIndexMapOutput = 3;
52 
53     const T& default_value = default_value_t.scalar<T>()();
54     const auto indices = indices_t.matrix<Tindex>();
55     const auto values = values_t.vec<T>();
56     const auto dense_shape = dense_shape_t.vec<Tindex>();
57 
58     const Tindex N = indices_t.shape().dim_size(0);
59     const Tindex dense_rows = dense_shape(0);
60 
61     bool* empty_row_indicator = nullptr;
62     if (context->output_required(kEmptyRowIndicatorOutput)) {
63       Tensor* empty_row_indicator_t = nullptr;
64       TF_RETURN_IF_ERROR(context->allocate_output(kEmptyRowIndicatorOutput,
65                                                   TensorShape({dense_rows}),
66                                                   &empty_row_indicator_t));
67       empty_row_indicator = empty_row_indicator_t->vec<bool>().data();
68     }
69     Tindex* reverse_index_map = nullptr;
70     if (context->output_required(kReverseIndexMapOutput)) {
71       Tensor* reverse_index_map_t = nullptr;
72       TF_RETURN_IF_ERROR(context->allocate_output(
73           kReverseIndexMapOutput, TensorShape({N}), &reverse_index_map_t));
74       reverse_index_map = reverse_index_map_t->vec<Tindex>().data();
75     }
76 
77     int rank = indices_t.shape().dim_size(1);
78 
79     if (dense_rows == 0) {
80       if (N != 0) {
81         return errors::InvalidArgument(
82             "Received SparseTensor with dense_shape[0] = 0 but "
83             "indices.shape[0] = ",
84             N);
85       }
86       Tensor* output_indices_t;
87       TensorShape output_indices_shape({0, rank});
88       TF_RETURN_IF_ERROR(context->allocate_output(
89           kOutputIndicesOutput, output_indices_shape, &output_indices_t));
90       Tensor* output_values_t;
91       TF_RETURN_IF_ERROR(context->allocate_output(
92           kOutputValuesOutput, TensorShape({0}), &output_values_t));
93 
94       // Exit early, nothing more to do.
95       return Status::OK();
96     }
97 
98     bool rows_are_ordered = true;
99     Tindex last_indices_row = 0;
100     std::vector<Tindex> csr_offset(dense_rows, 0);
101     for (int i = 0; i < N; ++i) {
102       const Tindex row = indices(i, 0);
103       if (row < 0 || row >= dense_rows) {
104         return errors::InvalidArgument("indices(", i, ", 0) is invalid: ", row,
105                                        " >= ", dense_rows);
106       }
107       ++csr_offset[row];
108       rows_are_ordered = rows_are_ordered & (row >= last_indices_row);
109       last_indices_row = row;
110     }
111     bool all_rows_full = true;
112     for (int row = 0; row < dense_rows; ++row) {
113       // csr_offset here describes the number of elements in this dense row
114       bool row_empty = (csr_offset[row] == 0);
115       if (empty_row_indicator) {
116         empty_row_indicator[row] = row_empty;
117       }
118       all_rows_full = all_rows_full & !row_empty;
119       // In filled version, each row has at least one element.
120       csr_offset[row] = std::max(csr_offset[row], Tindex{1});
121       // Update csr_offset to represent the number of elements up to and
122       // including dense_row + 1:
123       //  csr_offset(0) == #{elements of row 0}
124       //  csr_offset(1) == #{elements of row 1} + #{elements of row 0}
125       //  ..
126       //  csr_offset(i) == starting index for elements in row i + 1.
127       if (row > 0) {
128         csr_offset[row] += csr_offset[row - 1];
129       }
130     }
131 
132     if (all_rows_full && rows_are_ordered) {
133       context->set_output(kOutputIndicesOutput, indices_t);
134       context->set_output(kOutputValuesOutput, values_t);
135       if (reverse_index_map) {
136         for (Tindex i = 0; i < N; ++i) {
137           reverse_index_map[i] = i;
138         }
139       }
140     } else {
141       Tensor* output_indices_t;
142       const Tindex N_full = csr_offset[dense_rows - 1];
143       TensorShape output_indices_shape({N_full, rank});
144       TF_RETURN_IF_ERROR(context->allocate_output(
145           kOutputIndicesOutput, output_indices_shape, &output_indices_t));
146       auto output_indices = output_indices_t->matrix<Tindex>();
147 
148       Tensor* output_values_t;
149       TF_RETURN_IF_ERROR(context->allocate_output(
150           kOutputValuesOutput, TensorShape({N_full}), &output_values_t));
151       auto output_values = output_values_t->vec<T>();
152 
153       std::vector<Tindex> filled_count(dense_rows, 0);
154 
155       // Fill in values for rows that are not missing
156       for (Tindex i = 0; i < N; ++i) {
157         const Tindex row = indices(i, 0);
158         Tindex& offset = filled_count[row];
159         const Tindex output_i = ((row == 0) ? 0 : csr_offset[row - 1]) + offset;
160         offset++;  // Increment the filled count for this row.
161         std::copy_n(&indices(i, 0), rank, &output_indices(output_i, 0));
162         output_values(output_i) = values(i);
163         // We'll need this reverse index map to backprop correctly.
164         if (reverse_index_map) {
165           reverse_index_map[i] = output_i;
166         }
167       }
168 
169       // Fill in values for rows that are missing
170       for (Tindex row = 0; row < dense_rows; ++row) {
171         const Tindex row_count = filled_count[row];
172         if (row_count == 0) {  // We haven't filled this row
173           const Tindex starting_index = (row == 0) ? 0 : csr_offset[row - 1];
174           // Remaining index values were set to zero already.
175           // Just need to set the row index in the right location.
176           output_indices(starting_index, 0) = row;
177           for (Tindex col = 1; col < rank; ++col) {
178             output_indices(starting_index, col) = 0;
179           }
180           output_values(starting_index) = default_value;
181         }
182       }
183     }
184 
185     return Status::OK();
186   }
187 };
188 
189 }  // namespace functor
190 
191 namespace {
192 
193 template <typename Device, typename T, typename Tindex>
SparseFillEmptyRowsOpImpl(OpKernelContext * context,AsyncOpKernel::DoneCallback done=nullptr)194 void SparseFillEmptyRowsOpImpl(OpKernelContext* context,
195                                AsyncOpKernel::DoneCallback done = nullptr) {
196   // Note that setting this empty lambda as the default parameter value directly
197   // can cause strange compiler/linker errors, so we do it like this instead.
198   if (!done) {
199     done = [] {};
200   }
201 
202   const int kIndicesInput = 0;
203   const int kValuesInput = 1;
204   const int kDenseShapeInput = 2;
205   const int kDefaultValueInput = 3;
206 
207   const Tensor& indices_t = context->input(kIndicesInput);
208   const Tensor& values_t = context->input(kValuesInput);
209   const Tensor& dense_shape_t = context->input(kDenseShapeInput);
210   const Tensor& default_value_t = context->input(kDefaultValueInput);
211 
212   OP_REQUIRES_ASYNC(
213       context, TensorShapeUtils::IsVector(dense_shape_t.shape()),
214       errors::InvalidArgument("dense_shape must be a vector, saw: ",
215                               dense_shape_t.shape().DebugString()),
216       done);
217   OP_REQUIRES_ASYNC(context, TensorShapeUtils::IsMatrix(indices_t.shape()),
218                     errors::InvalidArgument("indices must be a matrix, saw: ",
219                                             indices_t.shape().DebugString()),
220                     done);
221   OP_REQUIRES_ASYNC(context, TensorShapeUtils::IsVector(values_t.shape()),
222                     errors::InvalidArgument("values must be a vector, saw: ",
223                                             values_t.shape().DebugString()),
224                     done);
225   OP_REQUIRES_ASYNC(
226       context, TensorShapeUtils::IsScalar(default_value_t.shape()),
227       errors::InvalidArgument("default_value must be a scalar, saw: ",
228                               default_value_t.shape().DebugString()),
229       done);
230   // TODO(ebrevdo): add shape checks between values, indices,
231   // Also add check that dense rank > 0.
232   OP_REQUIRES_ASYNC(context, dense_shape_t.NumElements() != 0,
233                     errors::InvalidArgument("Dense shape cannot be empty."),
234                     done);
235 
236   using FunctorType = functor::SparseFillEmptyRows<Device, T, Tindex>;
237   OP_REQUIRES_OK_ASYNC(context,
238                        FunctorType()(context, default_value_t, indices_t,
239                                      values_t, dense_shape_t, done),
240                        done);
241 }
242 
243 }  // namespace
244 
245 template <typename Device, typename T, typename Tindex>
246 class SparseFillEmptyRowsOp : public OpKernel {
247  public:
SparseFillEmptyRowsOp(OpKernelConstruction * context)248   explicit SparseFillEmptyRowsOp(OpKernelConstruction* context)
249       : OpKernel(context) {}
250 
Compute(OpKernelContext * context)251   void Compute(OpKernelContext* context) override {
252     SparseFillEmptyRowsOpImpl<Device, T, Tindex>(context);
253   }
254 };
255 
256 #define REGISTER_KERNELS(D, T, Tindex)                   \
257   REGISTER_KERNEL_BUILDER(Name("SparseFillEmptyRows")    \
258                               .Device(DEVICE_##D)        \
259                               .HostMemory("dense_shape") \
260                               .TypeConstraint<T>("T"),   \
261                           SparseFillEmptyRowsOp<D##Device, T, Tindex>)
262 
263 #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, int64)
264 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNELS);
265 #undef REGISTER_CPU_KERNELS
266 
267 #undef REGISTER_KERNELS
268 
269 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
270 
271 // The GPU implementation is async because it requires waiting for a
272 // host->device memcpy before the output is allocated (similar to
273 // SegmentSumGPUOp).
274 template <typename T, typename Tindex>
275 class SparseFillEmptyRowsGPUOp : public AsyncOpKernel {
276  public:
SparseFillEmptyRowsGPUOp(OpKernelConstruction * context)277   explicit SparseFillEmptyRowsGPUOp(OpKernelConstruction* context)
278       : AsyncOpKernel(context) {}
279 
ComputeAsync(OpKernelContext * context,DoneCallback done)280   void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
281     SparseFillEmptyRowsOpImpl<GPUDevice, T, Tindex>(context, done);
282   }
283 };
284 
285 #define REGISTER_KERNELS(T, Tindex)                      \
286   REGISTER_KERNEL_BUILDER(Name("SparseFillEmptyRows")    \
287                               .Device(DEVICE_GPU)        \
288                               .HostMemory("dense_shape") \
289                               .TypeConstraint<T>("T"),   \
290                           SparseFillEmptyRowsGPUOp<T, Tindex>)
291 
292 // Forward declarations of the functor specializations for GPU.
293 namespace functor {
294 #define DECLARE_GPU_SPEC(T, Tindex)                                            \
295   template <>                                                                  \
296   Status SparseFillEmptyRows<GPUDevice, T, Tindex>::operator()(                \
297       OpKernelContext* context, const Tensor& default_value_t,                 \
298       const Tensor& indices_t, const Tensor& values_t,                         \
299       const Tensor& dense_shape_t, typename AsyncOpKernel::DoneCallback done); \
300   extern template struct SparseFillEmptyRows<GPUDevice, T, Tindex>;
301 #define DECLARE_GPU_SPEC_INT64(T) DECLARE_GPU_SPEC(T, int64)
302 TF_CALL_POD_TYPES(DECLARE_GPU_SPEC_INT64)
303 #undef DECLARE_GPU_SPEC_INT64
304 #undef DECLARE_GPU_SPEC
305 }  // namespace functor
306 
307 #define REGISTER_KERNELS_TINDEX(T) REGISTER_KERNELS(T, int64)
308 TF_CALL_POD_TYPES(REGISTER_KERNELS_TINDEX)
309 #undef REGISTER_KERNELS_TINDEX
310 
311 #undef REGISTER_KERNELS
312 
313 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
314 
315 namespace functor {
316 
317 template <typename T, typename Tindex>
318 struct SparseFillEmptyRowsGrad<CPUDevice, T, Tindex> {
operator ()tensorflow::functor::SparseFillEmptyRowsGrad319   Status operator()(OpKernelContext* context,
320                     typename TTypes<Tindex>::ConstVec reverse_index_map,
321                     typename TTypes<T>::ConstVec grad_values,
322                     typename TTypes<T>::Vec d_values,
323                     typename TTypes<T>::Scalar d_default_value) {
324     const CPUDevice& device = context->eigen_device<CPUDevice>();
325     const Tindex N = reverse_index_map.dimension(0);
326     const Tindex N_full = grad_values.dimension(0);
327 
328     T& d_default_value_scalar = d_default_value();
329     d_default_value_scalar = T();
330 
331     Tensor visited_t;
332     TF_RETURN_IF_ERROR(
333         context->allocate_temp(DT_BOOL, TensorShape({N_full}), &visited_t));
334     auto visited = visited_t.vec<bool>();
335     visited.device(device) = visited.constant(false);
336 
337     for (int i = 0; i < N; ++i) {
338       // Locate the index of the output of the forward prop associated
339       // with this location in the input of the forward prop.  Copy
340       // the gradient into it.  Mark it as visited.
341       int64_t reverse_index = reverse_index_map(i);
342       if (reverse_index < 0 || reverse_index >= N_full) {
343         return errors::InvalidArgument(
344             "Elements in reverse index must be in [0, ", N_full, ") but got ",
345             reverse_index);
346       }
347       d_values(i) = grad_values(reverse_index);
348       visited(reverse_index) = true;
349     }
350     for (int j = 0; j < N_full; ++j) {
351       // The default value gradient gets the accumulated remainder of
352       // the backprop values (since the default value was used to fill
353       // in these slots in the forward calculation).
354       if (!visited(j)) {
355         d_default_value_scalar += grad_values(j);
356       }
357     }
358     return Status::OK();
359   }
360 };
361 
362 }  // namespace functor
363 
364 template <typename Device, typename T, typename Tindex>
365 class SparseFillEmptyRowsGradOp : public OpKernel {
366  public:
SparseFillEmptyRowsGradOp(OpKernelConstruction * context)367   explicit SparseFillEmptyRowsGradOp(OpKernelConstruction* context)
368       : OpKernel(context) {}
369 
Compute(OpKernelContext * context)370   void Compute(OpKernelContext* context) override {
371     const Tensor* reverse_index_map_t;
372     const Tensor* grad_values_t;
373     OP_REQUIRES_OK(context,
374                    context->input("reverse_index_map", &reverse_index_map_t));
375     OP_REQUIRES_OK(context, context->input("grad_values", &grad_values_t));
376 
377     OP_REQUIRES(
378         context, TensorShapeUtils::IsVector(reverse_index_map_t->shape()),
379         errors::InvalidArgument("reverse_index_map must be a vector, saw: ",
380                                 reverse_index_map_t->shape().DebugString()));
381     OP_REQUIRES(context, TensorShapeUtils::IsVector(grad_values_t->shape()),
382                 errors::InvalidArgument("grad_values must be a vector, saw: ",
383                                         grad_values_t->shape().DebugString()));
384 
385     const auto reverse_index_map = reverse_index_map_t->vec<Tindex>();
386     const auto grad_values = grad_values_t->vec<T>();
387 
388     const Tindex N = reverse_index_map_t->shape().dim_size(0);
389 
390     Tensor* d_values_t;
391     OP_REQUIRES_OK(context, context->allocate_output(
392                                 "d_values", TensorShape({N}), &d_values_t));
393     auto d_values = d_values_t->vec<T>();
394     Tensor* d_default_value_t;
395     OP_REQUIRES_OK(context,
396                    context->allocate_output("d_default_value", TensorShape({}),
397                                             &d_default_value_t));
398     auto d_default_value = d_default_value_t->scalar<T>();
399 
400     OP_REQUIRES_OK(context,
401                    functor::SparseFillEmptyRowsGrad<Device, T, Tindex>()(
402                        context, reverse_index_map, grad_values, d_values,
403                        d_default_value));
404   }
405 };
406 
407 #define REGISTER_KERNELS(D, T, Tindex)                    \
408   REGISTER_KERNEL_BUILDER(Name("SparseFillEmptyRowsGrad") \
409                               .Device(DEVICE_##D)         \
410                               .TypeConstraint<T>("T"),    \
411                           SparseFillEmptyRowsGradOp<D##Device, T, Tindex>)
412 
413 #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, int64)
414 TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
415 #undef REGISTER_CPU_KERNELS
416 
417 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
418 
419 // Forward declarations of the functor specializations for GPU.
420 namespace functor {
421 #define DECLARE_GPU_SPEC(T, Tindex)                                 \
422   template <>                                                       \
423   Status SparseFillEmptyRowsGrad<GPUDevice, T, Tindex>::operator()( \
424       OpKernelContext* context,                                     \
425       typename TTypes<Tindex>::ConstVec reverse_index_map,          \
426       typename TTypes<T>::ConstVec grad_values,                     \
427       typename TTypes<T>::Vec d_values,                             \
428       typename TTypes<T>::Scalar d_default_value);                  \
429   extern template struct SparseFillEmptyRowsGrad<GPUDevice, T, Tindex>;
430 #define DECLARE_GPU_SPEC_INT64(T) DECLARE_GPU_SPEC(T, int64)
431 TF_CALL_REAL_NUMBER_TYPES(DECLARE_GPU_SPEC_INT64);
432 #undef DECLARE_GPU_SPEC_INT64
433 #undef DECLARE_GPU_SPEC
434 }  // namespace functor
435 
436 #define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T, int64)
437 TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_KERNELS);
438 #undef REGISTER_GPU_KERNELS
439 
440 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
441 
442 #undef REGISTER_KERNELS
443 }  // namespace tensorflow
444