1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ 17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ 18 19 #include <string> 20 #include <unordered_map> 21 #include <vector> 22 23 #include "tensorflow/core/example/example.pb.h" 24 #include "tensorflow/core/framework/allocator.h" 25 #include "tensorflow/core/framework/graph.pb.h" 26 #include "tensorflow/core/framework/op_kernel.h" 27 #include "tensorflow/core/framework/partial_tensor_shape.h" 28 #include "tensorflow/core/framework/tensor.h" 29 #include "tensorflow/core/framework/types.h" 30 #include "tensorflow/core/lib/gtl/array_slice.h" 31 #include "tensorflow/core/platform/types.h" 32 #include "tensorflow/core/util/sparse/sparse_tensor.h" 33 34 namespace tensorflow { 35 namespace example { 36 37 // FastParseExampleConfig defines how to parse features in Example. 38 // Each sub-config is responsible for one feature identified with feautre_name. 39 // FastParseExampleConfig can't have two sub-configs with the same feature_name. 40 // dtype identifies the type of output vector and the kind of Feature expected 41 // in Example. 42 struct FastParseExampleConfig { 43 struct Dense { 44 string feature_name; 45 DataType dtype; 46 // These 2 fields correspond exactly to dense_shapes and dense_defaults in 47 // ParseExample op. 48 // Documentation is available in: tensorflow/core/ops/parsing_ops.cc 49 PartialTensorShape shape; 50 Tensor default_value; 51 bool variable_length; 52 std::size_t elements_per_stride; 53 }; 54 55 struct Sparse { 56 string feature_name; 57 DataType dtype; 58 }; 59 60 std::vector<Dense> dense; 61 std::vector<Sparse> sparse; 62 63 // If `true`, `Result::feature_stats` will contain one 64 // `PerExampleFeatureStats` for each serialized example in the input. 65 bool collect_feature_stats = false; 66 }; 67 68 // Statistics about the features in each example passed to 69 // `FastParse[Single]Example()`. 70 // 71 // TODO(b/111553342): The gathered statistics currently have two limitations: 72 // * Feature names that appear more than once will be counted multiple times. 73 // * The feature values count only represents the counts for features that were 74 // requested in the `FastParseExampleConfig`. 75 // These could be addressed with additional work at runtime. 76 struct PerExampleFeatureStats { 77 // The number of feature names in an example. 78 size_t features_count = 0; 79 80 // The sum of the number of values in each feature that is parsed. 81 size_t feature_values_count = 0; 82 }; 83 84 // This is exactly the output of TF's ParseExample Op. 85 // Documentation is available in: tensorflow/core/ops/parsing_ops.cc 86 struct Result { 87 std::vector<Tensor> sparse_indices; 88 std::vector<Tensor> sparse_values; 89 std::vector<Tensor> sparse_shapes; 90 std::vector<Tensor> dense_values; 91 92 // This vector will be populated with one element per example if 93 // `FastParseExampleConfig::collect_feature_stats` is set to `true`. 94 std::vector<PerExampleFeatureStats> feature_stats; 95 }; 96 97 // Parses a batch of serialized Example protos and converts them into result 98 // according to given config. 99 // Given example names have to either be empty or the same size as serialized. 100 // example_names are used only for error messages. 101 Status FastParseExample(const FastParseExampleConfig& config, 102 gtl::ArraySlice<string> serialized, 103 gtl::ArraySlice<string> example_names, 104 thread::ThreadPool* thread_pool, Result* result); 105 106 // TODO(mrry): Move the hash table construction into the config object. 107 typedef FastParseExampleConfig FastParseSingleExampleConfig; 108 109 Status FastParseSingleExample(const FastParseSingleExampleConfig& config, 110 const string& serialized, Result* result); 111 112 // Parses a batch of serialized SequenceExample protos and converts them into 113 // result according to given config. 114 // Given example names have to either be empty or the same size as serialized. 115 // example_names are used only for error messages. 116 Status FastParseSequenceExample( 117 const example::FastParseExampleConfig& context_config, 118 const example::FastParseExampleConfig& feature_list_config, 119 gtl::ArraySlice<string> serialized, gtl::ArraySlice<string> example_names, 120 thread::ThreadPool* thread_pool, example::Result* context_result, 121 example::Result* feature_list_result, 122 std::vector<Tensor>* dense_feature_lengths); 123 124 // This function parses serialized Example and populates given example. 125 // It uses the same specialized parser as FastParseExample which is efficient. 126 // But then constructs Example which is relatively slow. 127 // It is exported here as a convenient API to test parser part separately. 128 bool TestFastParse(const string& serialized, Example* example); 129 130 } // namespace example 131 } // namespace tensorflow 132 133 #endif // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ 134