1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "tensorflow/core/example/example.pb.h" 23 #include "tensorflow/core/example/feature.pb.h" 24 #include "tensorflow/core/framework/allocator.h" 25 #include "tensorflow/core/framework/graph.pb.h" 26 #include "tensorflow/core/framework/partial_tensor_shape.h" 27 #include "tensorflow/core/framework/tensor.h" 28 #include "tensorflow/core/framework/types.h" 29 #include "tensorflow/core/lib/core/errors.h" 30 #include "tensorflow/core/platform/types.h" 31 #include "tensorflow/core/util/sparse/sparse_tensor.h" 32 33 // This is a set of helper methods that will make it possible to share 34 // tensorflow::Example proto Tensor conversion code inside the ExampleParserOp 35 // OpKernel as well as in external code. 36 namespace tensorflow { 37 38 // "Dense" feature configuration. 39 struct FixedLenFeature { 40 string key; 41 DataType dtype; 42 TensorShape shape; 43 Tensor default_value; 44 string values_output_tensor_name; 45 }; 46 47 // "Sparse" feature configuration. 48 struct VarLenFeature { 49 string key; 50 DataType dtype; 51 string values_output_tensor_name; 52 string indices_output_tensor_name; 53 string shapes_output_tensor_name; 54 }; 55 56 // Given a single tensorflow::Example, with an optional example name 57 // at a particular index within a batch, and dense and sparse feature 58 // configurations from fixed_len_features, var_len_features, this method 59 // updates the dense value tensor and the sparse values temporary vector 60 // of tensors. The indexing of the output vectors correspond 1:1 to the 61 // indexing of the feature configuration vectors. 62 // 63 // The fixed_len_features and var_len_features maps are assume to be 64 // have disjoint key fields from the Feature map in the tensorflow.Example 65 // proto. 66 // 67 // For each sparse feature, the sparse values temporary vector holds a 68 // tensor for each Example. Each tensor is either empty or filled, depending 69 // on if the sparse feature value is set for the Example. This 70 // temporary structure is needed because we need to know the total number 71 // of filled elements in the batch to get the proper final sparse tensor 72 // shapes allocated. After the entire batch is processed, 73 // GetSparseTensorShape can be used to calculate the final shapes and 74 // CopyIntoSparseTensor can be used to copy from the temporary vector 75 // into the final allocated tensors. 76 Status SingleExampleProtoToTensors( 77 const Example& example, const string& name, const int batch_index, 78 const std::vector<FixedLenFeature>& fixed_len_features, 79 const std::vector<VarLenFeature>& var_len_features, 80 std::vector<Tensor*>* dense_values, 81 std::vector<std::vector<Tensor>>* sparse_values_temporary_vector); 82 83 // The shape of the indices and values tensors associated with a SparseTensor 84 // are dependent on the contents of the batch. 85 struct VarLenFeatureBatchShapes { 86 TensorShape indices_shape; 87 TensorShape values_shape; 88 int max_num_features; 89 }; 90 91 // Get the shape of the sparse values and indices tensors for the batch, 92 // given how many of the tensors in the temporary sparse values vector 93 // are actually filled. 94 Status GetSparseTensorShapes(const VarLenFeature& var_len_feature, 95 const std::vector<Tensor>& sparse_values_tmp, 96 const int batch_size, 97 VarLenFeatureBatchShapes* output_shapes); 98 99 // A method to convert a batch of tensorflow::Example protos into output 100 // tensors. This method is useful if there already is a batch of deserialized 101 // Example protos in memory (such as a serving use-case) and we do not wish 102 // to incur an extraneous serialize/deserialize. It is intended 103 // as an outside of OpKernel compatible replacement for the functionality of 104 // ExampleParserOp. In a serving setting, this method could be used to produce 105 // a feed_dict of Tensors that could bypass the ExampleParserOp. 106 // 107 // Note that unlike SingleExampleProtoToTensors, output tensors are 108 // allocated using a provided Allocator within this method. 109 Status BatchExampleProtoToTensors( 110 const std::vector<const Example*>& examples, 111 const std::vector<string>& names, 112 const std::vector<FixedLenFeature>& fixed_len_features, 113 const std::vector<VarLenFeature>& var_len_features, Allocator* allocator, 114 std::vector<Tensor>* output_dense_values_tensor, 115 std::vector<Tensor>* output_sparse_indices_tensor, 116 std::vector<Tensor>* output_sparse_values_tensor, 117 std::vector<Tensor>* output_sparse_shapes_tensor); 118 119 // Check that the given dtype is one that is compatible with 120 // tensorflow::Example protocol buffer feature values. 121 Status CheckValidType(const DataType& dtype); 122 123 // Check that the provided Feature proto message's oneof value 124 // matches that of the provided dtype. 125 Status CheckTypesMatch(const Feature& feature, const DataType& dtype, 126 bool* match); 127 128 // For a single Example, copy a dense feature value into an output 129 // dense value tensor Out at the provided out_index offset. 130 Status FeatureDenseCopy(const std::size_t out_index, const string& name, 131 const string& key, const DataType& dtype, 132 const TensorShape& shape, const Feature& feature, 133 Tensor* out); 134 135 // Copy the value a provided Tensor into an output dense_value tensor Out 136 // at the provided out_index offset. 137 void RowDenseCopy(const std::size_t& out_index, const DataType& dtype, 138 const Tensor& in, Tensor* out); 139 140 // For a single Example, and given sparse feature return a temporary output 141 // Tensor suitable for being collected in the temporary sparse value vector. 142 Tensor FeatureSparseCopy(const std::size_t batch, const string& key, 143 const DataType& dtype, const Feature& feature); 144 145 // Copy a temporary Tensor into the final sparse indices and values 146 // tensor at a given batch index and element offset. This method 147 // assumes that the indices/values Tensors have been properly allocated 148 // for the batch. 149 int64 CopyIntoSparseTensor(const Tensor& in, const int batch, 150 const int64 offset, Tensor* indices, Tensor* values); 151 152 // Parses the attributes passed to ParseExample. 153 // REQUIRES: Init must be called after construction. 154 class ParseExampleAttrs { 155 public: 156 template <typename ContextType> Init(ContextType * ctx)157 Status Init(ContextType* ctx) { 158 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types)); 159 TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense)); 160 TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse)); 161 TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types)); 162 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes)); 163 // Temporary check until we start allowing a variable length outer 164 // dimension. 165 for (int i = 0; i < dense_shapes.size(); ++i) { 166 bool shape_ok = true; 167 if (dense_shapes[i].dims() == -1) { 168 shape_ok = false; 169 } else { 170 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 171 if (dense_shapes[i].dim_size(d) == -1) { 172 shape_ok = false; 173 } 174 } 175 } 176 if (!shape_ok) { 177 return errors::InvalidArgument( 178 "dense_shapes[", i, 179 "] has unknown rank or unknown inner dimensions: ", 180 dense_shapes[i].DebugString()); 181 } 182 TensorShape dense_shape; 183 if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) { 184 variable_length.push_back(true); 185 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 186 dense_shape.AddDim(dense_shapes[i].dim_size(d)); 187 } 188 } else { 189 variable_length.push_back(false); 190 dense_shapes[i].AsTensorShape(&dense_shape); 191 } 192 elements_per_stride.push_back(dense_shape.num_elements()); 193 } 194 return FinishInit(); 195 } 196 197 int64 num_sparse; 198 int64 num_dense; 199 std::vector<DataType> sparse_types; 200 std::vector<DataType> dense_types; 201 std::vector<PartialTensorShape> dense_shapes; 202 std::vector<bool> variable_length; 203 std::vector<std::size_t> elements_per_stride; 204 205 private: 206 Status FinishInit(); // for context-independent parts of Init. 207 }; 208 209 // Parses the attributes passed to ParseSingleExample. 210 // REQUIRES: Init must be called after construction. 211 class ParseSingleExampleAttrs { 212 public: 213 template <typename ContextType> Init(ContextType * ctx)214 Status Init(ContextType* ctx) { 215 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_keys", &sparse_keys)); 216 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types)); 217 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_keys", &dense_keys)); 218 TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types)); 219 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes)); 220 221 int num_sparse; 222 TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse)); 223 if (num_sparse != sparse_keys.size() || num_sparse != sparse_types.size()) { 224 return errors::InvalidArgument( 225 "num_sparse (", num_sparse, ") must match the size of sparse_keys (", 226 sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")"); 227 } 228 229 // Temporary check until we start allowing a variable length outer 230 // dimension. 231 for (int i = 0; i < dense_shapes.size(); ++i) { 232 bool shape_ok = true; 233 if (dense_shapes[i].dims() == -1) { 234 shape_ok = false; 235 } else { 236 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 237 if (dense_shapes[i].dim_size(d) == -1) { 238 shape_ok = false; 239 } 240 } 241 } 242 if (!shape_ok) { 243 return errors::InvalidArgument( 244 "dense_shapes[", i, 245 "] has unknown rank or unknown inner dimensions: ", 246 dense_shapes[i].DebugString()); 247 } 248 TensorShape dense_shape; 249 if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) { 250 variable_length.push_back(true); 251 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 252 dense_shape.AddDim(dense_shapes[i].dim_size(d)); 253 } 254 } else { 255 variable_length.push_back(false); 256 dense_shapes[i].AsTensorShape(&dense_shape); 257 } 258 elements_per_stride.push_back(dense_shape.num_elements()); 259 } 260 return FinishInit(); 261 } 262 263 std::vector<string> sparse_keys; 264 std::vector<DataType> sparse_types; 265 std::vector<string> dense_keys; 266 std::vector<DataType> dense_types; 267 std::vector<PartialTensorShape> dense_shapes; 268 std::vector<bool> variable_length; 269 std::vector<std::size_t> elements_per_stride; 270 271 private: 272 Status FinishInit(); // for context-independent parts of Init. 273 }; 274 275 // Parses the attributes passed to ParseSequenceExample. 276 // REQUIRES: Init must be called after construction. 277 class ParseSequenceExampleAttrs { 278 public: 279 template <typename ContextType> Init(ContextType * ctx)280 Status Init(ContextType* ctx) { 281 std::vector<string> feature_list_dense_missing_assumed_empty_tmp; 282 TF_RETURN_IF_ERROR( 283 ctx->GetAttr("feature_list_dense_missing_assumed_empty", 284 &feature_list_dense_missing_assumed_empty_tmp)); 285 for (const string& feature : feature_list_dense_missing_assumed_empty_tmp) { 286 feature_list_dense_missing_assumed_empty.insert(feature); 287 } 288 TF_RETURN_IF_ERROR( 289 ctx->GetAttr("context_sparse_keys", &context_sparse_keys)); 290 TF_RETURN_IF_ERROR(ctx->GetAttr("context_dense_keys", &context_dense_keys)); 291 TF_RETURN_IF_ERROR( 292 ctx->GetAttr("feature_list_sparse_keys", &feature_list_sparse_keys)); 293 TF_RETURN_IF_ERROR( 294 ctx->GetAttr("feature_list_dense_keys", &feature_list_dense_keys)); 295 TF_RETURN_IF_ERROR( 296 ctx->GetAttr("context_sparse_types", &context_sparse_types)); 297 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense)); 298 TF_RETURN_IF_ERROR( 299 ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense)); 300 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse)); 301 TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types)); 302 TF_RETURN_IF_ERROR( 303 ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types)); 304 TF_RETURN_IF_ERROR( 305 ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types)); 306 TF_RETURN_IF_ERROR( 307 ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse)); 308 TF_RETURN_IF_ERROR( 309 ctx->GetAttr("context_dense_shapes", &context_dense_shapes)); 310 TF_RETURN_IF_ERROR( 311 ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes)); 312 return FinishInit(); 313 } 314 315 std::unordered_set<string> feature_list_dense_missing_assumed_empty; 316 int64 num_context_sparse; 317 int64 num_context_dense; 318 int64 num_feature_list_sparse; 319 int64 num_feature_list_dense; 320 std::vector<string> context_sparse_keys; 321 std::vector<string> context_dense_keys; 322 std::vector<string> feature_list_sparse_keys; 323 std::vector<string> feature_list_dense_keys; 324 std::vector<DataType> context_sparse_types; 325 std::vector<DataType> context_dense_types; 326 std::vector<TensorShape> context_dense_shapes; 327 std::vector<DataType> feature_list_sparse_types; 328 std::vector<DataType> feature_list_dense_types; 329 std::vector<TensorShape> feature_list_dense_shapes; 330 331 private: 332 Status FinishInit(); // for context-independent parts of Init. 333 }; 334 335 // Parses the attributes passed to ParseSingleSequenceExample. 336 // REQUIRES: Init must be called after construction. 337 class ParseSingleSequenceExampleAttrs { 338 public: 339 template <typename ContextType> Init(ContextType * ctx)340 Status Init(ContextType* ctx) { 341 TF_RETURN_IF_ERROR( 342 ctx->GetAttr("context_sparse_types", &context_sparse_types)); 343 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense)); 344 TF_RETURN_IF_ERROR( 345 ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense)); 346 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse)); 347 TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types)); 348 TF_RETURN_IF_ERROR( 349 ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types)); 350 TF_RETURN_IF_ERROR( 351 ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types)); 352 TF_RETURN_IF_ERROR( 353 ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse)); 354 TF_RETURN_IF_ERROR( 355 ctx->GetAttr("context_dense_shapes", &context_dense_shapes)); 356 TF_RETURN_IF_ERROR( 357 ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes)); 358 return FinishInit(); 359 } 360 361 int64 num_context_sparse; 362 int64 num_context_dense; 363 int64 num_feature_list_sparse; 364 int64 num_feature_list_dense; 365 std::vector<DataType> context_sparse_types; 366 std::vector<DataType> context_dense_types; 367 std::vector<TensorShape> context_dense_shapes; 368 std::vector<DataType> feature_list_sparse_types; 369 std::vector<DataType> feature_list_dense_types; 370 std::vector<TensorShape> feature_list_dense_shapes; 371 372 private: 373 Status FinishInit(); // for context-independent parts of Init. 374 }; 375 376 } // namespace tensorflow 377 378 #endif // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 379