• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "tensorflow/core/example/example.pb.h"
23 #include "tensorflow/core/example/feature.pb.h"
24 #include "tensorflow/core/framework/allocator.h"
25 #include "tensorflow/core/framework/graph.pb.h"
26 #include "tensorflow/core/framework/partial_tensor_shape.h"
27 #include "tensorflow/core/framework/tensor.h"
28 #include "tensorflow/core/framework/types.h"
29 #include "tensorflow/core/lib/core/errors.h"
30 #include "tensorflow/core/platform/types.h"
31 #include "tensorflow/core/util/sparse/sparse_tensor.h"
32 
33 // This is a set of helper methods that will make it possible to share
34 // tensorflow::Example proto Tensor conversion code inside the ExampleParserOp
35 // OpKernel as well as in external code.
36 namespace tensorflow {
37 
38 // "Dense" feature configuration.
39 struct FixedLenFeature {
40   string key;
41   DataType dtype;
42   TensorShape shape;
43   Tensor default_value;
44   string values_output_tensor_name;
45 };
46 
47 // "Sparse" feature configuration.
48 struct VarLenFeature {
49   string key;
50   DataType dtype;
51   string values_output_tensor_name;
52   string indices_output_tensor_name;
53   string shapes_output_tensor_name;
54 };
55 
56 // Given a single tensorflow::Example, with an optional example name
57 // at a particular index within a batch, and dense and sparse feature
58 // configurations from fixed_len_features, var_len_features, this method
59 // updates the dense value tensor and the sparse values temporary vector
60 // of tensors. The indexing of the output vectors correspond 1:1 to the
61 // indexing of the feature configuration vectors.
62 //
63 // The fixed_len_features and var_len_features maps are assume to be
64 // have disjoint key fields from the Feature map in the tensorflow.Example
65 // proto.
66 //
67 // For each sparse feature, the sparse values temporary vector holds a
68 // tensor for each Example. Each tensor is either empty or filled, depending
69 // on if the sparse feature value is set for the Example. This
70 // temporary structure is needed because we need to know the total number
71 // of filled elements in the batch to get the proper final sparse tensor
72 // shapes allocated.  After the entire batch is processed,
73 // GetSparseTensorShape can be used to calculate the final shapes and
74 // CopyIntoSparseTensor can be used to copy from the temporary vector
75 // into the final allocated tensors.
76 Status SingleExampleProtoToTensors(
77     const Example& example, const string& name, const int batch_index,
78     const std::vector<FixedLenFeature>& fixed_len_features,
79     const std::vector<VarLenFeature>& var_len_features,
80     std::vector<Tensor*>* dense_values,
81     std::vector<std::vector<Tensor>>* sparse_values_temporary_vector);
82 
83 // The shape of the indices and values tensors associated with a SparseTensor
84 // are dependent on the contents of the batch.
85 struct VarLenFeatureBatchShapes {
86   TensorShape indices_shape;
87   TensorShape values_shape;
88   int max_num_features;
89 };
90 
91 // Get the shape of the sparse values and indices tensors for the batch,
92 // given how many of the tensors in the temporary sparse values vector
93 // are actually filled.
94 Status GetSparseTensorShapes(const VarLenFeature& var_len_feature,
95                              const std::vector<Tensor>& sparse_values_tmp,
96                              const int batch_size,
97                              VarLenFeatureBatchShapes* output_shapes);
98 
99 // A method to convert a batch of tensorflow::Example protos into output
100 // tensors. This method is useful if there already is a batch of deserialized
101 // Example protos in memory (such as a serving use-case) and we do not wish
102 // to incur an extraneous serialize/deserialize.  It is intended
103 // as an outside of OpKernel compatible replacement for the functionality of
104 // ExampleParserOp. In a serving setting, this method could be used to produce
105 // a feed_dict of Tensors that could bypass the ExampleParserOp.
106 //
107 // Note that unlike SingleExampleProtoToTensors, output tensors are
108 // allocated using a provided Allocator within this method.
109 Status BatchExampleProtoToTensors(
110     const std::vector<const Example*>& examples,
111     const std::vector<string>& names,
112     const std::vector<FixedLenFeature>& fixed_len_features,
113     const std::vector<VarLenFeature>& var_len_features, Allocator* allocator,
114     std::vector<Tensor>* output_dense_values_tensor,
115     std::vector<Tensor>* output_sparse_indices_tensor,
116     std::vector<Tensor>* output_sparse_values_tensor,
117     std::vector<Tensor>* output_sparse_shapes_tensor);
118 
119 // Check that the given dtype is one that is compatible with
120 // tensorflow::Example protocol buffer feature values.
121 Status CheckValidType(const DataType& dtype);
122 
123 // Check that the provided Feature proto message's oneof value
124 // matches that of the provided dtype.
125 Status CheckTypesMatch(const Feature& feature, const DataType& dtype,
126                        bool* match);
127 
128 // For a single Example, copy a dense feature value into an output
129 // dense value tensor Out at the provided out_index offset.
130 Status FeatureDenseCopy(const std::size_t out_index, const string& name,
131                         const string& key, const DataType& dtype,
132                         const TensorShape& shape, const Feature& feature,
133                         Tensor* out);
134 
135 // Copy the value a provided Tensor into an output dense_value tensor Out
136 // at the provided out_index offset.
137 void RowDenseCopy(const std::size_t& out_index, const DataType& dtype,
138                   const Tensor& in, Tensor* out);
139 
140 // For a single Example, and given sparse feature return a temporary output
141 // Tensor suitable for being collected in the temporary sparse value vector.
142 Tensor FeatureSparseCopy(const std::size_t batch, const string& key,
143                          const DataType& dtype, const Feature& feature);
144 
145 // Copy a temporary Tensor into the final sparse indices and values
146 // tensor at a given batch index and element offset. This method
147 // assumes that the indices/values Tensors have been properly allocated
148 // for the batch.
149 int64 CopyIntoSparseTensor(const Tensor& in, const int batch,
150                            const int64 offset, Tensor* indices, Tensor* values);
151 
152 // Parses the attributes passed to ParseExample.
153 // REQUIRES: Init must be called after construction.
154 class ParseExampleAttrs {
155  public:
156   template <typename ContextType>
Init(ContextType * ctx)157   Status Init(ContextType* ctx) {
158     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
159     TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense));
160     TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse));
161     TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
162     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
163     // Temporary check until we start allowing a variable length outer
164     // dimension.
165     for (int i = 0; i < dense_shapes.size(); ++i) {
166       bool shape_ok = true;
167       if (dense_shapes[i].dims() == -1) {
168         shape_ok = false;
169       } else {
170         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
171           if (dense_shapes[i].dim_size(d) == -1) {
172             shape_ok = false;
173           }
174         }
175       }
176       if (!shape_ok) {
177         return errors::InvalidArgument(
178             "dense_shapes[", i,
179             "] has unknown rank or unknown inner dimensions: ",
180             dense_shapes[i].DebugString());
181       }
182       TensorShape dense_shape;
183       if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
184         variable_length.push_back(true);
185         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
186           dense_shape.AddDim(dense_shapes[i].dim_size(d));
187         }
188       } else {
189         variable_length.push_back(false);
190         dense_shapes[i].AsTensorShape(&dense_shape);
191       }
192       elements_per_stride.push_back(dense_shape.num_elements());
193     }
194     return FinishInit();
195   }
196 
197   int64 num_sparse;
198   int64 num_dense;
199   std::vector<DataType> sparse_types;
200   std::vector<DataType> dense_types;
201   std::vector<PartialTensorShape> dense_shapes;
202   std::vector<bool> variable_length;
203   std::vector<std::size_t> elements_per_stride;
204 
205  private:
206   Status FinishInit();  // for context-independent parts of Init.
207 };
208 
209 // Parses the attributes passed to ParseSingleExample.
210 // REQUIRES: Init must be called after construction.
211 class ParseSingleExampleAttrs {
212  public:
213   template <typename ContextType>
Init(ContextType * ctx)214   Status Init(ContextType* ctx) {
215     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_keys", &sparse_keys));
216     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
217     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_keys", &dense_keys));
218     TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
219     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
220 
221     int num_sparse;
222     TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse));
223     if (num_sparse != sparse_keys.size() || num_sparse != sparse_types.size()) {
224       return errors::InvalidArgument(
225           "num_sparse (", num_sparse, ") must match the size of sparse_keys (",
226           sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")");
227     }
228 
229     // Temporary check until we start allowing a variable length outer
230     // dimension.
231     for (int i = 0; i < dense_shapes.size(); ++i) {
232       bool shape_ok = true;
233       if (dense_shapes[i].dims() == -1) {
234         shape_ok = false;
235       } else {
236         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
237           if (dense_shapes[i].dim_size(d) == -1) {
238             shape_ok = false;
239           }
240         }
241       }
242       if (!shape_ok) {
243         return errors::InvalidArgument(
244             "dense_shapes[", i,
245             "] has unknown rank or unknown inner dimensions: ",
246             dense_shapes[i].DebugString());
247       }
248       TensorShape dense_shape;
249       if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
250         variable_length.push_back(true);
251         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
252           dense_shape.AddDim(dense_shapes[i].dim_size(d));
253         }
254       } else {
255         variable_length.push_back(false);
256         dense_shapes[i].AsTensorShape(&dense_shape);
257       }
258       elements_per_stride.push_back(dense_shape.num_elements());
259     }
260     return FinishInit();
261   }
262 
263   std::vector<string> sparse_keys;
264   std::vector<DataType> sparse_types;
265   std::vector<string> dense_keys;
266   std::vector<DataType> dense_types;
267   std::vector<PartialTensorShape> dense_shapes;
268   std::vector<bool> variable_length;
269   std::vector<std::size_t> elements_per_stride;
270 
271  private:
272   Status FinishInit();  // for context-independent parts of Init.
273 };
274 
275 // Parses the attributes passed to ParseSequenceExample.
276 // REQUIRES: Init must be called after construction.
277 class ParseSequenceExampleAttrs {
278  public:
279   template <typename ContextType>
Init(ContextType * ctx)280   Status Init(ContextType* ctx) {
281     std::vector<string> feature_list_dense_missing_assumed_empty_tmp;
282     TF_RETURN_IF_ERROR(
283         ctx->GetAttr("feature_list_dense_missing_assumed_empty",
284                      &feature_list_dense_missing_assumed_empty_tmp));
285     for (const string& feature : feature_list_dense_missing_assumed_empty_tmp) {
286       feature_list_dense_missing_assumed_empty.insert(feature);
287     }
288     TF_RETURN_IF_ERROR(
289         ctx->GetAttr("context_sparse_keys", &context_sparse_keys));
290     TF_RETURN_IF_ERROR(ctx->GetAttr("context_dense_keys", &context_dense_keys));
291     TF_RETURN_IF_ERROR(
292         ctx->GetAttr("feature_list_sparse_keys", &feature_list_sparse_keys));
293     TF_RETURN_IF_ERROR(
294         ctx->GetAttr("feature_list_dense_keys", &feature_list_dense_keys));
295     TF_RETURN_IF_ERROR(
296         ctx->GetAttr("context_sparse_types", &context_sparse_types));
297     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense));
298     TF_RETURN_IF_ERROR(
299         ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense));
300     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse));
301     TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types));
302     TF_RETURN_IF_ERROR(
303         ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types));
304     TF_RETURN_IF_ERROR(
305         ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types));
306     TF_RETURN_IF_ERROR(
307         ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse));
308     TF_RETURN_IF_ERROR(
309         ctx->GetAttr("context_dense_shapes", &context_dense_shapes));
310     TF_RETURN_IF_ERROR(
311         ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes));
312     return FinishInit();
313   }
314 
315   std::unordered_set<string> feature_list_dense_missing_assumed_empty;
316   int64 num_context_sparse;
317   int64 num_context_dense;
318   int64 num_feature_list_sparse;
319   int64 num_feature_list_dense;
320   std::vector<string> context_sparse_keys;
321   std::vector<string> context_dense_keys;
322   std::vector<string> feature_list_sparse_keys;
323   std::vector<string> feature_list_dense_keys;
324   std::vector<DataType> context_sparse_types;
325   std::vector<DataType> context_dense_types;
326   std::vector<TensorShape> context_dense_shapes;
327   std::vector<DataType> feature_list_sparse_types;
328   std::vector<DataType> feature_list_dense_types;
329   std::vector<TensorShape> feature_list_dense_shapes;
330 
331  private:
332   Status FinishInit();  // for context-independent parts of Init.
333 };
334 
335 // Parses the attributes passed to ParseSingleSequenceExample.
336 // REQUIRES: Init must be called after construction.
337 class ParseSingleSequenceExampleAttrs {
338  public:
339   template <typename ContextType>
Init(ContextType * ctx)340   Status Init(ContextType* ctx) {
341     TF_RETURN_IF_ERROR(
342         ctx->GetAttr("context_sparse_types", &context_sparse_types));
343     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense));
344     TF_RETURN_IF_ERROR(
345         ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense));
346     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse));
347     TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types));
348     TF_RETURN_IF_ERROR(
349         ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types));
350     TF_RETURN_IF_ERROR(
351         ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types));
352     TF_RETURN_IF_ERROR(
353         ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse));
354     TF_RETURN_IF_ERROR(
355         ctx->GetAttr("context_dense_shapes", &context_dense_shapes));
356     TF_RETURN_IF_ERROR(
357         ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes));
358     return FinishInit();
359   }
360 
361   int64 num_context_sparse;
362   int64 num_context_dense;
363   int64 num_feature_list_sparse;
364   int64 num_feature_list_dense;
365   std::vector<DataType> context_sparse_types;
366   std::vector<DataType> context_dense_types;
367   std::vector<TensorShape> context_dense_shapes;
368   std::vector<DataType> feature_list_sparse_types;
369   std::vector<DataType> feature_list_dense_types;
370   std::vector<TensorShape> feature_list_dense_shapes;
371 
372  private:
373   Status FinishInit();  // for context-independent parts of Init.
374 };
375 
376 }  // namespace tensorflow
377 
378 #endif  // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
379