• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // =============================================================================
15 
16 #ifndef TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_
17 #define TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_
18 
19 #include <algorithm>
20 #include <unordered_set>
21 #include <vector>
22 #include "tensorflow/contrib/boosted_trees/lib/utils/optional_value.h"
23 #include "tensorflow/core/lib/gtl/inlined_vector.h"
24 
25 namespace tensorflow {
26 namespace boosted_trees {
27 namespace utils {
28 // Represents sparse vector that have a value for some feature indices within
29 // the feature column.
30 // Allows subscript access [].
31 template <class T>
32 class SparseMultidimensionalValues {
33  public:
Add(const int32 feature_idx,const T value)34   void Add(const int32 feature_idx, const T value) {
35     values_.emplace_back(feature_idx, value);
36   }
37 
Clear()38   void Clear() { values_.clear(); }
39 
Reserve(const int32 size)40   void Reserve(const int32 size) { values_.reserve(size); }
41 
42   OptionalValue<T> operator[](int feature_idx) const {
43     auto value_iter =
44         std::find_if(values_.begin(), values_.end(),
45                      [&feature_idx](const std::pair<int32, T>& element) {
46                        return element.first == feature_idx;
47                      });
48 
49     if (value_iter == values_.end()) {
50       return OptionalValue<T>();
51     }
52     return OptionalValue<T>(value_iter->second);
53   }
54 
55  private:
56   std::vector<std::pair<int32, T>> values_;
57 };
58 
59 // Represents storage for a sparse float feature column. Can store values either
60 // for one dimensional or a multivalent (multidimensional) sparse column.
61 // Allows subscript operator access [feature_id].
62 template <class T>
63 class SparseFloatFeatureColumn {
64  public:
Reserve(const int32 size)65   void Reserve(const int32 size) {
66     if (!single_dimensional_) {
67       multidimensional_values.Reserve(size);
68     }
69   }
70 
SetDimension(const int32 dimension)71   void SetDimension(const int32 dimension) {
72     single_dimensional_ = dimension <= 1;
73   }
74 
Add(const int32 feature_idx,const float value)75   void Add(const int32 feature_idx, const float value) {
76     if (single_dimensional_) {
77       DCHECK_EQ(0, feature_idx);
78       single_value_ = value;
79     } else {
80       multidimensional_values.Add(feature_idx, value);
81     }
82     initialized_ = true;
83   }
84 
Clear()85   void Clear() {
86     single_dimensional_ = false;
87     initialized_ = false;
88     multidimensional_values.Clear();
89   }
90 
91   OptionalValue<T> operator[](int feature_idx) const {
92     if (!initialized_) {
93       return OptionalValue<T>();
94     }
95     if (single_dimensional_) {
96       return OptionalValue<T>(single_value_);
97     } else {
98       return multidimensional_values[feature_idx];
99     }
100   }
101 
102  private:
103   bool single_dimensional_;
104   bool initialized_;
105   T single_value_;
106   SparseMultidimensionalValues<T> multidimensional_values;
107 };
108 
109 // Holds data for one example and enables lookup by feature column.
110 struct Example {
111   // Default constructor creates an empty example.
ExampleExample112   Example() : example_idx(-1) {}
113 
114   // Example index.
115   int64 example_idx;
116 
117   // Dense and sparse float features indexed by feature column.
118   // TODO(salehay): figure out a design to support multivalent float features.
119   std::vector<float> dense_float_features;
120 
121   // Sparse float features columns (can be either single or multivalent
122   // (multidimensional).
123   std::vector<SparseFloatFeatureColumn<float>> sparse_float_features;
124 
125   // Sparse integer features indexed by feature column.
126   // Note that all integer features are assumed to be categorical, i.e. will
127   // never be compared by order. Also these features can be multivalent.
128   // By default we allocate a InlinedVector of length 1 though since that is
129   // the most common case.
130   std::vector<gtl::InlinedVector<int64, 1>> sparse_int_features;
131 };
132 
133 }  // namespace utils
134 }  // namespace boosted_trees
135 }  // namespace tensorflow
136 
137 #endif  // TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_UTILS_EXAMPLE_H_
138