1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/framework/op_kernel.h" 17 #include "tensorflow/core/framework/register_types.h" 18 #include "tensorflow/core/framework/tensor.h" 19 #include "tensorflow/core/framework/types.h" 20 #include "tensorflow/core/framework/variant.h" 21 #include "tensorflow/core/framework/variant_encode_decode.h" 22 #include "tensorflow/core/lib/gtl/inlined_vector.h" 23 24 namespace tensorflow { 25 26 namespace { 27 28 class DeserializeSparseOp : public OpKernel { 29 public: DeserializeSparseOp(OpKernelConstruction * context)30 explicit DeserializeSparseOp(OpKernelConstruction* context) 31 : OpKernel(context) { 32 OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_)); 33 } 34 Compute(OpKernelContext * context)35 void Compute(OpKernelContext* context) override { 36 const Tensor& input = context->input(0); 37 38 OP_REQUIRES( 39 context, input.dims() > 0, 40 errors::InvalidArgument("Serialized sparse should have non-zero rank ", 41 input.shape().DebugString())); 42 OP_REQUIRES(context, input.shape().dim_size(input.dims() - 1) == 3, 43 errors::InvalidArgument( 44 "Serialized sparse should have 3 as the last dimension ", 45 input.shape().DebugString())); 46 47 // `input_dims_to_stack` is the number of dimensions that will be added to 48 // each of the elements before they are concatenated into the output. 49 const int64 input_dims_to_stack = input.dims() - 1; 50 int num_sparse_tensors = 1; 51 for (int i = 0; i < input_dims_to_stack; ++i) { 52 num_sparse_tensors *= input.shape().dim_size(i); 53 } 54 55 if (num_sparse_tensors == 1 && input_dims_to_stack == 0) { 56 // Special case with a single sparse tensor, and no dimensions to add 57 // to the output indices. We can return the boxed tensors directly (after 58 // validating them). 59 const Tensor* output_indices; 60 const Tensor* output_values; 61 const Tensor* output_shape; 62 const auto& input_as_vec = input.vec<Variant>(); 63 int64 total_non_zeros; 64 OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( 65 input_as_vec(1), input_as_vec(2), 0, 66 &output_shape, &total_non_zeros)); 67 OP_REQUIRES_OK(context, GetAndValidateSparseTensorIndicesAndValues( 68 input_as_vec(0), input_as_vec(1), 0, 69 output_shape->NumElements(), &output_indices, 70 &output_values)); 71 context->set_output(0, *output_indices); 72 context->set_output(1, *output_values); 73 context->set_output(2, *output_shape); 74 return; 75 } 76 77 OP_REQUIRES( 78 context, num_sparse_tensors > 0, 79 errors::InvalidArgument( 80 "Serialized sparse should have at least 1 serialized tensor, " 81 "but has a zero dimension ", 82 input.shape().DebugString())); 83 84 const auto& input_as_matrix = input.flat_inner_dims<Variant, 2>(); 85 86 // Compute the output "dense shape" of and number of non-zero elements in 87 // the stacked sparse tensors. Given an input of shape (S_0, ..., 88 // S_{input_dims_to_stack-1}, 3), and an element of dense shape (E_0, ... 89 // E_n), the output dense shape will be (S_0, ..., 90 // S_{input_dims_to_stack-1}, E_0, ..., E_n). 91 Tensor* output_shape; 92 int64 total_non_zeros = 0; 93 94 // Allocate and build the initial output shape based on the element shape of 95 // the 0th sparse tensor in the input. 96 // 97 // NOTE(mrry): We define `element_shape` as a `const Tensor*` rather than a 98 // `Tensor` to avoid the overhead of allocating and deallocating a `Tensor` 99 // on the stack. While the per-`Tensor` cost is small, this op can unbox a 100 // large number of tensors (3 per batch element) and these fixed overheads 101 // dominate when the number of non-zeros per element is small. 102 const Tensor* element_shape; 103 OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( 104 input_as_matrix(0, 1), input_as_matrix(0, 2), 0, 105 &element_shape, &total_non_zeros)); 106 OP_REQUIRES_OK(context, 107 context->allocate_output( 108 2, {input_dims_to_stack + element_shape->NumElements()}, 109 &output_shape)); 110 const auto element_shape_vec = element_shape->vec<int64>(); 111 auto output_shape_vec = output_shape->vec<int64>(); 112 output_shape_vec(0) = num_sparse_tensors; 113 for (int64 j = 0; j < input_dims_to_stack; ++j) { 114 output_shape_vec(j) = input.dim_size(j); 115 } 116 for (int64 j = 0; j < element_shape->NumElements(); ++j) { 117 output_shape_vec(j + input_dims_to_stack) = element_shape_vec(j); 118 } 119 120 // Accumulate the number of non-zero elements from the remaining sparse 121 // tensors, and validate that they have compatible dense shapes. 122 // 123 // NOTE(mrry): For compatibility with the implementations of 124 // DeserializeManySparse, and many ops that generate SparseTensors to batch 125 // that do not have a fixed dense_shape (e.g. `tf.parse_single_example()`), 126 // we compute the maximum in each dimension to find the smallest dense_shape 127 // that bounds all of the input SparseTensors. 128 for (int i = 1; i < num_sparse_tensors; ++i) { 129 int64 num_non_zeros; 130 OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( 131 input_as_matrix(i, 1), input_as_matrix(i, 2), 132 i, &element_shape, &num_non_zeros)); 133 total_non_zeros += num_non_zeros; 134 OP_REQUIRES( 135 context, 136 output_shape->NumElements() - input_dims_to_stack == 137 element_shape->NumElements(), 138 errors::InvalidArgument( 139 "Inconsistent shape across SparseTensors: rank prior to " 140 "SparseTensor[", 141 i, "] was: ", output_shape->NumElements() - input_dims_to_stack, 142 " but rank of SparseTensor[", i, 143 "] is: ", element_shape->NumElements())); 144 const auto element_shape_vec = element_shape->vec<int64>(); 145 for (int j = 0; j < element_shape->NumElements(); ++j) { 146 output_shape_vec(j + input_dims_to_stack) = std::max( 147 output_shape_vec(j + input_dims_to_stack), element_shape_vec(j)); 148 } 149 } 150 151 // Compute the output "indices" matrix and "values" vector. 152 Tensor* output_indices; 153 Tensor* output_values; 154 155 const int output_rank = output_shape->NumElements(); 156 OP_REQUIRES_OK(context, 157 context->allocate_output( 158 0, {static_cast<int64>(total_non_zeros), output_rank}, 159 &output_indices)); 160 OP_REQUIRES_OK( 161 context, context->allocate_output( 162 1, {static_cast<int64>(total_non_zeros)}, &output_values)); 163 164 // The bulk of the work in this method involves building the output indices 165 // in a tight loop. For cache friendliness, we generate the indices in the 166 // order that they will be laid out in memory. We use raw pointers instead 167 // of Eigen element/slice indexing methods, to access the underlying index 168 // buffer to minimize the amount of work in that tight loop. 169 int64* output_indices_data = output_indices->matrix<int64>().data(); 170 size_t current_row = 0; 171 172 for (int i = 0; i < num_sparse_tensors; ++i) { 173 const Tensor* element_indices; 174 const Tensor* element_values; 175 OP_REQUIRES_OK(context, this->GetAndValidateSparseTensorIndicesAndValues( 176 input_as_matrix(i, 0), input_as_matrix(i, 1), 177 i, output_rank - input_dims_to_stack, 178 &element_indices, &element_values)); 179 180 const size_t num_index_rows = element_values->NumElements(); 181 182 // An empty sparse tensor in the input will generate no data 183 // in the output. We short-circuit the rest of the iteration to avoid 184 // triggering assertions in the Eigen when manipulating empty tensors (or 185 // slices of tensors). 186 if (num_index_rows == 0) continue; 187 188 const size_t start_row = current_row; 189 const size_t next_start_row = current_row + num_index_rows; 190 191 // NOTE(mrry): If the element is a scalar SparseTensor, 192 // `element_indices` will be an empty tensor, and this pointer will not 193 // be valid. However, we will not dereference the pointer in that case, 194 // because `input_dims_to_stack == output_rank`. 195 const int64* element_indices_data = 196 element_indices->matrix<int64>().data(); 197 198 // Build the submatrix of `output_indices` for the i^th sparse tensor 199 // in the input. 200 // 201 // Each row of `output_indices` comprises `input_dims_to_stack` indices 202 // based on the position of the i^th sparse tensor in the input tensor, 203 // followed by the indices from the corresponding row in 204 // `element_indices`. 205 if (input_dims_to_stack == 1 && output_rank == 2) { 206 // We specialize this case because the compiler can generate 207 // more efficient code when the number of indices for each element is 208 // known statically. Since the most common use of this op is to 209 // serialize batches of SparseTensors, and the most common source of 210 // SparseTensors is the `tf.parse_single_example()` op, which generates 211 // 1-D SparseTensors, we statically unroll the loop for the rank 2 212 // output case. 213 for (; current_row < next_start_row; ++current_row) { 214 *output_indices_data++ = i; 215 *output_indices_data++ = *element_indices_data++; 216 } 217 } else { 218 // `sparse_tensor_index` is the tuple of indices that correspond to 219 // mapping the flat element index (`i`) back onto the stacked 220 // coordinates implied by the position of the i^th sparse tensor in the 221 // input tensor. 222 // 223 // We build `sparse_tensor_index` in reverse (innermost/minor dimension 224 // to outermost/major dimension). The `cumulative_product` represents 225 // the size of the inner subtensor for which `sparse_tensor_index` has 226 // already been built. 227 gtl::InlinedVector<int64, 4> sparse_tensor_index(input_dims_to_stack); 228 int cumulative_product = 1; 229 for (size_t j = 0; j < sparse_tensor_index.size(); ++j) { 230 size_t reverse_index = sparse_tensor_index.size() - j - 1; 231 sparse_tensor_index[reverse_index] = 232 (i / cumulative_product) % input.dim_size(reverse_index); 233 cumulative_product *= input.dim_size(reverse_index); 234 } 235 for (; current_row < next_start_row; ++current_row) { 236 for (int64 sparse_tensor_index_component : sparse_tensor_index) { 237 *output_indices_data++ = sparse_tensor_index_component; 238 } 239 for (size_t k = input_dims_to_stack; k < output_rank; ++k) { 240 *output_indices_data++ = *element_indices_data++; 241 } 242 } 243 } 244 245 // Build the subvector of `output_values` for the i^th sparse tensor 246 // in the input. 247 // 248 // NOTE(mrry): There is a potential optimization here where we use a T* 249 // to represent the current position in `output_values`, but it would 250 // require some rejigging of the template parameters. 251 // NOTE(mrry): Another potential optimization: if we know that this 252 // operation consumes its input, we could std::move non-primitive elements 253 // into the output and avoid a copy. 254 Eigen::DSizes<Eigen::DenseIndex, 1> values_start(start_row); 255 Eigen::DSizes<Eigen::DenseIndex, 1> values_sizes(num_index_rows); 256 257 #define HANDLE_TYPE(T) \ 258 case DataTypeToEnum<T>::value: { \ 259 output_values->vec<T>().slice(values_start, values_sizes) = \ 260 element_values->vec<T>(); \ 261 break; \ 262 } 263 switch (dtype_) { 264 TF_CALL_ALL_TYPES(HANDLE_TYPE); 265 TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE); 266 #undef HANDLE_TYPE 267 default: 268 OP_REQUIRES_OK( 269 context, errors::Unimplemented( 270 "DeserializeSparse Unhandled data type: ", dtype_)); 271 } 272 } 273 } 274 275 private: GetAndValidateSparseTensorShape(const Variant & serialized_values,const Variant & serialized_shape,int index,const Tensor ** output_shape,int64 * output_num_non_zeros)276 Status GetAndValidateSparseTensorShape(const Variant& serialized_values, 277 const Variant& serialized_shape, 278 int index, const Tensor** output_shape, 279 int64* output_num_non_zeros) { 280 // Deserialize and validate the shape. 281 *output_shape = serialized_shape.get<Tensor>(); 282 if (*output_shape == nullptr) { 283 return errors::InvalidArgument( 284 "Could not get a tensor from serialized_sparse[", index, ", 2]"); 285 } 286 if ((*output_shape)->dtype() != DT_INT64) { 287 return errors::InvalidArgument( 288 "Expected serialized_sparse[", index, 289 ", 2] to be a vector of DT_INT64 but received dtype ", 290 DataTypeString((*output_shape)->dtype())); 291 } 292 if (!TensorShapeUtils::IsVector((*output_shape)->shape())) { 293 return errors::InvalidArgument( 294 "Expected serialized_sparse[", index, 295 ", 2] to be a shape vector but its shape is ", 296 (*output_shape)->shape().DebugString()); 297 } 298 *output_num_non_zeros = serialized_values.get<Tensor>()->NumElements(); 299 return Status::OK(); 300 } 301 GetAndValidateSparseTensorIndicesAndValues(const Variant & serialized_indices,const Variant & serialized_values,int index,int expected_rank,const Tensor ** output_indices,const Tensor ** output_values)302 Status GetAndValidateSparseTensorIndicesAndValues( 303 const Variant& serialized_indices, const Variant& serialized_values, 304 int index, int expected_rank, const Tensor** output_indices, 305 const Tensor** output_values) { 306 // Deserialize and validate the indices. 307 *output_indices = serialized_indices.get<Tensor>(); 308 if (*output_indices == nullptr) { 309 return errors::InvalidArgument( 310 "Could not get a tensor from serialized_sparse[", index, ", 0]"); 311 } 312 if ((*output_indices)->dtype() != DT_INT64) { 313 return errors::InvalidArgument( 314 "Expected serialized_sparse[", index, 315 ", 0] to be a matrix of DT_INT64 but received dtype ", 316 DataTypeString((*output_indices)->dtype())); 317 } 318 if (!TensorShapeUtils::IsMatrix((*output_indices)->shape())) { 319 return errors::InvalidArgument( 320 "Expected serialized_sparse[", index, 321 ", 0] to represent an index matrix but received shape ", 322 (*output_indices)->shape().DebugString()); 323 } 324 int64 num_entries = (*output_indices)->dim_size(0); 325 int rank = (*output_indices)->dim_size(1); 326 if (rank != expected_rank) { 327 return errors::InvalidArgument( 328 "Expected column counts of SparseTensor[", index, 329 "].indices to match size of SparseTensor[", index, 330 "].shape but they do not: ", rank, " vs. ", expected_rank); 331 } 332 333 // Deserialize and validate the values. 334 *output_values = serialized_values.get<Tensor>(); 335 if (*output_values == nullptr) { 336 return errors::InvalidArgument( 337 "Could not get a tensor from serialized_sparse[", index, ", 1]"); 338 } 339 if (!TensorShapeUtils::IsVector((*output_values)->shape())) { 340 return errors::InvalidArgument( 341 "Expected serialized_sparse[", index, 342 ", 1] to represent a values vector but received shape ", 343 (*output_values)->shape().DebugString()); 344 } 345 if (dtype_ != (*output_values)->dtype()) { 346 return errors::InvalidArgument( 347 "Requested SparseTensor of type ", DataTypeString(dtype_), 348 " but SparseTensor[", index, 349 "].values.dtype() == ", DataTypeString((*output_values)->dtype())); 350 } 351 if (num_entries != (*output_values)->dim_size(0)) { 352 return errors::InvalidArgument( 353 "Expected row counts of SparseTensor[", index, 354 "].indices and SparseTensor[", index, 355 "].values to match but they do not: ", num_entries, " vs. ", 356 (*output_values)->dim_size(0)); 357 } 358 359 return Status::OK(); 360 } 361 362 DataType dtype_; 363 }; 364 365 REGISTER_KERNEL_BUILDER(Name("DeserializeSparse") 366 .Device(DEVICE_CPU) 367 .TypeConstraint<Variant>("Tserialized"), 368 DeserializeSparseOp) 369 370 } // namespace 371 372 } // namespace tensorflow 373