• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_
17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_
18 
19 #include <string>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "tensorflow/core/example/example.pb.h"
24 #include "tensorflow/core/framework/allocator.h"
25 #include "tensorflow/core/framework/graph.pb.h"
26 #include "tensorflow/core/framework/op_kernel.h"
27 #include "tensorflow/core/framework/partial_tensor_shape.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/types.h"
30 #include "tensorflow/core/lib/gtl/array_slice.h"
31 #include "tensorflow/core/platform/types.h"
32 #include "tensorflow/core/util/sparse/sparse_tensor.h"
33 
34 namespace tensorflow {
35 namespace example {
36 
37 // FastParseExampleConfig defines how to parse features in Example.
38 // Each sub-config is responsible for one feature identified with feautre_name.
39 // FastParseExampleConfig can't have two sub-configs with the same feature_name.
40 // dtype identifies the type of output vector and the kind of Feature expected
41 // in Example.
42 struct FastParseExampleConfig {
43   struct Dense {
44     string feature_name;
45     DataType dtype;
46     // These 2 fields correspond exactly to dense_shapes and dense_defaults in
47     // ParseExample op.
48     // Documentation is available in: tensorflow/core/ops/parsing_ops.cc
49     PartialTensorShape shape;
50     Tensor default_value;
51     bool variable_length;
52     std::size_t elements_per_stride;
53   };
54 
55   struct Sparse {
56     string feature_name;
57     DataType dtype;
58   };
59 
60   std::vector<Dense> dense;
61   std::vector<Sparse> sparse;
62 
63   // If `true`, `Result::feature_stats` will contain one
64   // `PerExampleFeatureStats` for each serialized example in the input.
65   bool collect_feature_stats = false;
66 };
67 
68 // Statistics about the features in each example passed to
69 // `FastParse[Single]Example()`.
70 //
71 // TODO(b/111553342): The gathered statistics currently have two limitations:
72 // * Feature names that appear more than once will be counted multiple times.
73 // * The feature values count only represents the counts for features that were
74 //   requested in the `FastParseExampleConfig`.
75 // These could be addressed with additional work at runtime.
76 struct PerExampleFeatureStats {
77   // The number of feature names in an example.
78   size_t features_count = 0;
79 
80   // The sum of the number of values in each feature that is parsed.
81   size_t feature_values_count = 0;
82 };
83 
84 // This is exactly the output of TF's ParseExample Op.
85 // Documentation is available in: tensorflow/core/ops/parsing_ops.cc
86 struct Result {
87   std::vector<Tensor> sparse_indices;
88   std::vector<Tensor> sparse_values;
89   std::vector<Tensor> sparse_shapes;
90   std::vector<Tensor> dense_values;
91 
92   // This vector will be populated with one element per example if
93   // `FastParseExampleConfig::collect_feature_stats` is set to `true`.
94   std::vector<PerExampleFeatureStats> feature_stats;
95 };
96 
97 // Parses a batch of serialized Example protos and converts them into result
98 // according to given config.
99 // Given example names have to either be empty or the same size as serialized.
100 // example_names are used only for error messages.
101 Status FastParseExample(const FastParseExampleConfig& config,
102                         gtl::ArraySlice<string> serialized,
103                         gtl::ArraySlice<string> example_names,
104                         thread::ThreadPool* thread_pool, Result* result);
105 
106 // TODO(mrry): Move the hash table construction into the config object.
107 typedef FastParseExampleConfig FastParseSingleExampleConfig;
108 
109 Status FastParseSingleExample(const FastParseSingleExampleConfig& config,
110                               const string& serialized, Result* result);
111 
112 // Parses a batch of serialized SequenceExample protos and converts them into
113 // result according to given config.
114 // Given example names have to either be empty or the same size as serialized.
115 // example_names are used only for error messages.
116 Status FastParseSequenceExample(
117     const example::FastParseExampleConfig& context_config,
118     const example::FastParseExampleConfig& feature_list_config,
119     gtl::ArraySlice<string> serialized, gtl::ArraySlice<string> example_names,
120     thread::ThreadPool* thread_pool, example::Result* context_result,
121     example::Result* feature_list_result,
122     std::vector<Tensor>* dense_feature_lengths);
123 
124 // This function parses serialized Example and populates given example.
125 // It uses the same specialized parser as FastParseExample which is efficient.
126 // But then constructs Example which is relatively slow.
127 // It is exported here as a convenient API to test parser part separately.
128 bool TestFastParse(const string& serialized, Example* example);
129 
130 }  // namespace example
131 }  // namespace tensorflow
132 
133 #endif  // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_
134