1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // A set of lightweight wrappers which simplify access to Feature protos. 17 // 18 // TensorFlow Example proto uses associative maps on top of oneof fields. 19 // SequenceExample proto uses associative map of FeatureList. 20 // So accessing feature values is not very convenient. 21 // 22 // For example, to read a first value of integer feature "tag": 23 // int id = example.features().feature().at("tag").int64_list().value(0); 24 // 25 // to add a value: 26 // auto features = example->mutable_features(); 27 // (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id); 28 // 29 // For float features you have to use float_list, for string - bytes_list. 30 // 31 // To do the same with this library: 32 // int id = GetFeatureValues<int64>("tag", example).Get(0); 33 // GetFeatureValues<int64>("tag", &example)->Add(id); 34 // 35 // Modification of bytes features is slightly different: 36 // auto tag = GetFeatureValues<string>("tag", &example); 37 // *tag->Add() = "lorem ipsum"; 38 // 39 // To copy multiple values into a feature: 40 // AppendFeatureValues({1,2,3}, "tag", &example); 41 // 42 // GetFeatureValues gives you access to underlying data - RepeatedField object 43 // (RepeatedPtrField for byte list). So refer to its documentation of 44 // RepeatedField for full list of supported methods. 45 // 46 // NOTE: Due to the nature of oneof proto fields setting a feature of one type 47 // automatically clears all values stored as another type with the same feature 48 // key. 49 // 50 // This library also has tools to work with SequenceExample protos. 51 // 52 // To get a value from SequenceExample.context: 53 // int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0); 54 // To add a value to the context: 55 // GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42); 56 // 57 // To add values to feature_lists: 58 // AppendFeatureValues({4.0}, 59 // GetFeatureList("images", &se)->Add()); 60 // AppendFeatureValues({5.0, 3.0}, 61 // GetFeatureList("images", &se)->Add()); 62 // This will create a feature list keyed as "images" with two features: 63 // feature_lists { 64 // feature_list { 65 // key: "images" 66 // value { 67 // feature { float_list { value: [4.0] } } 68 // feature { float_list { value: [5.0, 3.0] } } 69 // } 70 // } 71 // } 72 // 73 // Functions exposed by this library: 74 // HasFeature<[FeatureType]>(key, proto) -> bool 75 // Returns true if a feature with the specified key, and optionally 76 // FeatureType, belongs to the Features or Example proto. 77 // HasFeatureList(key, sequence_example) -> bool 78 // Returns true if SequenceExample has a feature_list with the key. 79 // 80 // GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType> 81 // Returns values for the specified key and the FeatureType. 82 // Supported types for the proto: Example, Features. 83 // GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature> 84 // Returns Feature protos associated with a key. 85 // 86 // AppendFeatureValues(begin, end, feature) 87 // AppendFeatureValues(container or initializer_list, feature) 88 // Copies values into a Feature. 89 // AppendFeatureValues(begin, end, key, proto) 90 // AppendFeatureValues(container or initializer_list, key, proto) 91 // Copies values into Features and Example protos with the specified key. 92 // 93 // ClearFeatureValues<FeatureType>(feature) 94 // Clears the feature's repeated field of the given type. 95 // 96 // SetFeatureValues(begin, end, feature) 97 // SetFeatureValues(container or initializer_list, feature) 98 // Clears a Feature, then copies values into it. 99 // SetFeatureValues(begin, end, key, proto) 100 // SetFeatureValues(container or initializer_list, key, proto) 101 // Clears Features or Example protos with the specified key, 102 // then copies values into them. 103 // 104 // Auxiliary functions, it is unlikely you'll need to use them directly: 105 // GetFeatures(proto) -> Features 106 // A convenience function to get Features proto. 107 // Supported types for the proto: Example, Features. 108 // GetFeature(key, proto) -> Feature 109 // Returns a Feature proto for the specified key. 110 // Supported types for the proto: Example, Features. 111 // GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType> 112 // Returns values of the feature for the FeatureType. 113 114 #ifndef TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 115 #define TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 116 117 #include <iterator> 118 #include <type_traits> 119 120 #include "absl/base/macros.h" 121 #include "tensorflow/core/example/example.pb.h" 122 #include "tensorflow/core/example/feature.pb.h" 123 #include "tensorflow/core/platform/protobuf.h" 124 #include "tensorflow/core/platform/stringpiece.h" 125 #include "tensorflow/core/platform/types.h" 126 127 namespace tensorflow { 128 129 namespace internal { 130 131 // TODO(gorban): Update all clients in a followup CL. 132 // Returns a reference to a feature corresponding to the name. 133 // Note: it will create a new Feature if it is missing in the example. 134 ABSL_DEPRECATED("Use GetFeature instead.") 135 Feature& ExampleFeature(const std::string& name, Example* example); 136 137 // Specializations of RepeatedFieldTrait define a type of RepeatedField 138 // corresponding to a selected feature type. 139 template <typename FeatureType> 140 struct RepeatedFieldTrait; 141 142 template <> 143 struct RepeatedFieldTrait<protobuf_int64> { 144 using Type = protobuf::RepeatedField<protobuf_int64>; 145 }; 146 147 template <> 148 struct RepeatedFieldTrait<float> { 149 using Type = protobuf::RepeatedField<float>; 150 }; 151 152 template <> 153 struct RepeatedFieldTrait<tstring> { 154 using Type = protobuf::RepeatedPtrField<std::string>; 155 }; 156 157 template <> 158 struct RepeatedFieldTrait<std::string> { 159 using Type = protobuf::RepeatedPtrField<std::string>; 160 }; 161 162 // Specializations of FeatureTrait define a type of feature corresponding to a 163 // selected value type. 164 template <typename ValueType, class Enable = void> 165 struct FeatureTrait; 166 167 template <typename ValueType> 168 struct FeatureTrait<ValueType, typename std::enable_if< 169 std::is_integral<ValueType>::value>::type> { 170 using Type = protobuf_int64; 171 }; 172 173 template <typename ValueType> 174 struct FeatureTrait< 175 ValueType, 176 typename std::enable_if<std::is_floating_point<ValueType>::value>::type> { 177 using Type = float; 178 }; 179 180 template <typename T> 181 struct is_string 182 : public std::integral_constant< 183 bool, 184 std::is_same<char*, typename std::decay<T>::type>::value || 185 std::is_same<const char*, typename std::decay<T>::type>::value> { 186 }; 187 188 template <> 189 struct is_string<std::string> : std::true_type {}; 190 191 template <> 192 struct is_string<::tensorflow::StringPiece> : std::true_type {}; 193 194 template <> 195 struct is_string<tstring> : std::true_type {}; 196 197 template <typename ValueType> 198 struct FeatureTrait< 199 ValueType, typename std::enable_if<is_string<ValueType>::value>::type> { 200 using Type = std::string; 201 }; 202 203 } // namespace internal 204 205 // Returns true if sequence_example has a feature_list with the specified key. 206 bool HasFeatureList(const std::string& key, 207 const SequenceExample& sequence_example); 208 209 template <typename T> 210 struct TypeHasFeatures : std::false_type {}; 211 212 template <> 213 struct TypeHasFeatures<Example> : std::true_type {}; 214 215 template <> 216 struct TypeHasFeatures<Features> : std::true_type {}; 217 218 // A family of template functions to return mutable Features proto from a 219 // container proto. Supported ProtoTypes: Example, Features. 220 template <typename ProtoType> 221 typename std::enable_if<TypeHasFeatures<ProtoType>::value, Features*>::type 222 GetFeatures(ProtoType* proto); 223 224 template <typename ProtoType> 225 typename std::enable_if<TypeHasFeatures<ProtoType>::value, 226 const Features&>::type 227 GetFeatures(const ProtoType& proto); 228 229 // Base declaration of a family of template functions to return a read only 230 // repeated field of feature values. 231 template <typename FeatureType> 232 const typename internal::RepeatedFieldTrait<FeatureType>::Type& 233 GetFeatureValues(const Feature& feature); 234 235 // Returns a read only repeated field corresponding to a feature with the 236 // specified name and FeatureType. Supported ProtoTypes: Example, Features. 237 template <typename FeatureType, typename ProtoType> 238 const typename internal::RepeatedFieldTrait<FeatureType>::Type& 239 GetFeatureValues(const std::string& key, const ProtoType& proto) { 240 return GetFeatureValues<FeatureType>(GetFeatures(proto).feature().at(key)); 241 } 242 243 // Returns a mutable repeated field of a feature values. 244 template <typename FeatureType> 245 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( 246 Feature* feature); 247 248 // Returns a mutable repeated field corresponding to a feature with the 249 // specified name and FeatureType. Supported ProtoTypes: Example, Features. 250 template <typename FeatureType, typename ProtoType> 251 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues( 252 const std::string& key, ProtoType* proto) { 253 ::tensorflow::Feature& feature = 254 (*GetFeatures(proto)->mutable_feature())[key]; 255 return GetFeatureValues<FeatureType>(&feature); 256 } 257 258 // Returns a read-only Feature proto for the specified key, throws 259 // std::out_of_range if the key is not found. Supported types for the proto: 260 // Example, Features. 261 template <typename ProtoType> 262 const Feature& GetFeature(const std::string& key, const ProtoType& proto) { 263 return GetFeatures(proto).feature().at(key); 264 } 265 266 // Returns a mutable Feature proto for the specified key, creates a new if 267 // necessary. Supported types for the proto: Example, Features. 268 template <typename ProtoType> 269 Feature* GetFeature(const std::string& key, ProtoType* proto) { 270 return &(*GetFeatures(proto)->mutable_feature())[key]; 271 } 272 273 // Returns a repeated field with features corresponding to a feature_list key. 274 const protobuf::RepeatedPtrField<Feature>& GetFeatureList( 275 const std::string& key, const SequenceExample& sequence_example); 276 277 // Returns a mutable repeated field with features corresponding to a 278 // feature_list key. It will create a new FeatureList if necessary. 279 protobuf::RepeatedPtrField<Feature>* GetFeatureList( 280 const std::string& feature_list_key, SequenceExample* sequence_example); 281 282 template <typename IteratorType> 283 void AppendFeatureValues(IteratorType first, IteratorType last, 284 Feature* feature) { 285 using FeatureType = typename internal::FeatureTrait< 286 typename std::iterator_traits<IteratorType>::value_type>::Type; 287 std::copy(first, last, 288 protobuf::RepeatedFieldBackInserter( 289 GetFeatureValues<FeatureType>(feature))); 290 } 291 292 template <typename ValueType> 293 void AppendFeatureValues(std::initializer_list<ValueType> container, 294 Feature* feature) { 295 AppendFeatureValues(container.begin(), container.end(), feature); 296 } 297 298 template <typename ContainerType> 299 void AppendFeatureValues(const ContainerType& container, Feature* feature) { 300 using IteratorType = typename ContainerType::const_iterator; 301 AppendFeatureValues<IteratorType>(container.begin(), container.end(), 302 feature); 303 } 304 305 // Copies elements from the range, defined by [first, last) into the feature 306 // obtainable from the (proto, key) combination. 307 template <typename IteratorType, typename ProtoType> 308 void AppendFeatureValues(IteratorType first, IteratorType last, 309 const std::string& key, ProtoType* proto) { 310 AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); 311 } 312 313 // Copies all elements from the container into a feature. 314 template <typename ContainerType, typename ProtoType> 315 void AppendFeatureValues(const ContainerType& container, const std::string& key, 316 ProtoType* proto) { 317 using IteratorType = typename ContainerType::const_iterator; 318 AppendFeatureValues<IteratorType>(container.begin(), container.end(), key, 319 proto); 320 } 321 322 // Copies all elements from the initializer list into a Feature contained by 323 // Features or Example proto. 324 template <typename ValueType, typename ProtoType> 325 void AppendFeatureValues(std::initializer_list<ValueType> container, 326 const std::string& key, ProtoType* proto) { 327 using IteratorType = 328 typename std::initializer_list<ValueType>::const_iterator; 329 AppendFeatureValues<IteratorType>(container.begin(), container.end(), key, 330 proto); 331 } 332 333 // Clears the feature's repeated field (int64, float, or string). 334 template <typename... FeatureType> 335 void ClearFeatureValues(Feature* feature); 336 337 // Clears the feature's repeated field (int64, float, or string). Copies 338 // elements from the range, defined by [first, last) into the feature's repeated 339 // field. 340 template <typename IteratorType> 341 void SetFeatureValues(IteratorType first, IteratorType last, Feature* feature) { 342 using FeatureType = typename internal::FeatureTrait< 343 typename std::iterator_traits<IteratorType>::value_type>::Type; 344 ClearFeatureValues<FeatureType>(feature); 345 AppendFeatureValues(first, last, feature); 346 } 347 348 // Clears the feature's repeated field (int64, float, or string). Copies all 349 // elements from the initializer list into the feature's repeated field. 350 template <typename ValueType> 351 void SetFeatureValues(std::initializer_list<ValueType> container, 352 Feature* feature) { 353 SetFeatureValues(container.begin(), container.end(), feature); 354 } 355 356 // Clears the feature's repeated field (int64, float, or string). Copies all 357 // elements from the container into the feature's repeated field. 358 template <typename ContainerType> 359 void SetFeatureValues(const ContainerType& container, Feature* feature) { 360 using IteratorType = typename ContainerType::const_iterator; 361 SetFeatureValues<IteratorType>(container.begin(), container.end(), feature); 362 } 363 364 // Clears the feature's repeated field (int64, float, or string). Copies 365 // elements from the range, defined by [first, last) into the feature's repeated 366 // field. 367 template <typename IteratorType, typename ProtoType> 368 void SetFeatureValues(IteratorType first, IteratorType last, 369 const std::string& key, ProtoType* proto) { 370 SetFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); 371 } 372 373 // Clears the feature's repeated field (int64, float, or string). Copies all 374 // elements from the container into the feature's repeated field. 375 template <typename ContainerType, typename ProtoType> 376 void SetFeatureValues(const ContainerType& container, const std::string& key, 377 ProtoType* proto) { 378 using IteratorType = typename ContainerType::const_iterator; 379 SetFeatureValues<IteratorType>(container.begin(), container.end(), key, 380 proto); 381 } 382 383 // Clears the feature's repeated field (int64, float, or string). Copies all 384 // elements from the initializer list into the feature's repeated field. 385 template <typename ValueType, typename ProtoType> 386 void SetFeatureValues(std::initializer_list<ValueType> container, 387 const std::string& key, ProtoType* proto) { 388 using IteratorType = 389 typename std::initializer_list<ValueType>::const_iterator; 390 SetFeatureValues<IteratorType>(container.begin(), container.end(), key, 391 proto); 392 } 393 394 // Returns true if a feature with the specified key belongs to the Features. 395 // The template parameter pack accepts zero or one template argument - which 396 // is FeatureType. If the FeatureType not specified (zero template arguments) 397 // the function will not check the feature type. Otherwise it will return false 398 // if the feature has a wrong type. 399 template <typename... FeatureType> 400 bool HasFeature(const std::string& key, const Features& features); 401 402 // Returns true if a feature with the specified key belongs to the Example. 403 // Doesn't check feature type if used without FeatureType, otherwise the 404 // specialized versions return false if the feature has a wrong type. 405 template <typename... FeatureType> 406 bool HasFeature(const std::string& key, const Example& example) { 407 return HasFeature<FeatureType...>(key, GetFeatures(example)); 408 } 409 410 // TODO(gorban): update all clients in a followup CL. 411 template <typename... FeatureType> 412 ABSL_DEPRECATED("Use HasFeature instead.") 413 bool ExampleHasFeature(const std::string& key, const Example& example) { 414 return HasFeature<FeatureType...>(key, example); 415 } 416 417 } // namespace tensorflow 418 #endif // TENSORFLOW_CORE_EXAMPLE_FEATURE_UTIL_H_ 419