1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 16 #ifndef TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_ 17 #define TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_ 18 19 #include <algorithm> 20 #include <unordered_set> 21 #include <vector> 22 #include "tensorflow/contrib/boosted_trees/lib/utils/optional_value.h" 23 #include "tensorflow/core/lib/gtl/inlined_vector.h" 24 25 namespace tensorflow { 26 namespace boosted_trees { 27 namespace utils { 28 // Represents sparse vector that have a value for some feature indices within 29 // the feature column. 30 // Allows subscript access []. 31 template <class T> 32 class SparseMultidimensionalValues { 33 public: Add(const int32 feature_idx,const T value)34 void Add(const int32 feature_idx, const T value) { 35 values_.emplace_back(feature_idx, value); 36 } 37 Clear()38 void Clear() { values_.clear(); } 39 Reserve(const int32 size)40 void Reserve(const int32 size) { values_.reserve(size); } 41 42 OptionalValue<T> operator[](int feature_idx) const { 43 auto value_iter = 44 std::find_if(values_.begin(), values_.end(), 45 [&feature_idx](const std::pair<int32, T>& element) { 46 return element.first == feature_idx; 47 }); 48 49 if (value_iter == values_.end()) { 50 return OptionalValue<T>(); 51 } 52 return OptionalValue<T>(value_iter->second); 53 } 54 55 private: 56 std::vector<std::pair<int32, T>> values_; 57 }; 58 59 // Represents storage for a sparse float feature column. Can store values either 60 // for one dimensional or a multivalent (multidimensional) sparse column. 61 // Allows subscript operator access [feature_id]. 62 template <class T> 63 class SparseFloatFeatureColumn { 64 public: Reserve(const int32 size)65 void Reserve(const int32 size) { 66 if (!single_dimensional_) { 67 multidimensional_values.Reserve(size); 68 } 69 } 70 SetDimension(const int32 dimension)71 void SetDimension(const int32 dimension) { 72 single_dimensional_ = dimension <= 1; 73 } 74 Add(const int32 feature_idx,const float value)75 void Add(const int32 feature_idx, const float value) { 76 if (single_dimensional_) { 77 DCHECK_EQ(0, feature_idx); 78 single_value_ = value; 79 } else { 80 multidimensional_values.Add(feature_idx, value); 81 } 82 initialized_ = true; 83 } 84 Clear()85 void Clear() { 86 single_dimensional_ = false; 87 initialized_ = false; 88 multidimensional_values.Clear(); 89 } 90 91 OptionalValue<T> operator[](int feature_idx) const { 92 if (!initialized_) { 93 return OptionalValue<T>(); 94 } 95 if (single_dimensional_) { 96 return OptionalValue<T>(single_value_); 97 } else { 98 return multidimensional_values[feature_idx]; 99 } 100 } 101 102 private: 103 bool single_dimensional_; 104 bool initialized_; 105 T single_value_; 106 SparseMultidimensionalValues<T> multidimensional_values; 107 }; 108 109 // Holds data for one example and enables lookup by feature column. 110 struct Example { 111 // Default constructor creates an empty example. ExampleExample112 Example() : example_idx(-1) {} 113 114 // Example index. 115 int64 example_idx; 116 117 // Dense and sparse float features indexed by feature column. 118 // TODO(salehay): figure out a design to support multivalent float features. 119 std::vector<float> dense_float_features; 120 121 // Sparse float features columns (can be either single or multivalent 122 // (multidimensional). 123 std::vector<SparseFloatFeatureColumn<float>> sparse_float_features; 124 125 // Sparse integer features indexed by feature column. 126 // Note that all integer features are assumed to be categorical, i.e. will 127 // never be compared by order. Also these features can be multivalent. 128 // By default we allocate a InlinedVector of length 1 though since that is 129 // the most common case. 130 std::vector<gtl::InlinedVector<int64, 1>> sparse_int_features; 131 }; 132 133 } // namespace utils 134 } // namespace boosted_trees 135 } // namespace tensorflow 136 137 #endif // TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_ 138