1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_TREE_HELPER_H_
17 #define TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_TREE_HELPER_H_
18 #include <cmath>
19 #include <vector>
20
21 #include "third_party/eigen3/Eigen/Core"
22 #include "third_party/eigen3/Eigen/QR"
23 #include "tensorflow/core/framework/tensor.h"
24 #include "tensorflow/core/platform/types.h"
25
26 namespace tensorflow {
27
28 namespace boosted_trees {
29 // TODO(nponomareva, youngheek): consider using vector.
30 struct SplitCandidate {
SplitCandidateSplitCandidate31 SplitCandidate() {}
32
33 // Index in the tensor of node_ids for the feature with idx feature_idx.
34 int64 candidate_idx = 0;
35
36 int64 feature_id = 0;
37 float gain = 0.0;
38 int32 threshold = 0.0;
39 int32 dimension_id = 0;
40 std::vector<float> left_node_contribs;
41 std::vector<float> right_node_contribs;
42 // The split type, i.e., with missing value to left/right.
43 string split_type;
44 };
45 } // namespace boosted_trees
46
GainsAreEqual(const float g1,const float g2)47 static bool GainsAreEqual(const float g1, const float g2) {
48 const float kTolerance = 1e-15;
49 return std::abs(g1 - g2) < kTolerance;
50 }
51
GainIsLarger(const float g1,const float g2)52 static bool GainIsLarger(const float g1, const float g2) {
53 const float kTolerance = 1e-15;
54 return g1 - g2 >= kTolerance;
55 }
56
MultiDimLogitSolveForWeightAndGain(const Eigen::MatrixXf & hessian_and_reg,const Eigen::VectorXf & g,Eigen::VectorXf * weight,float * gain)57 static void MultiDimLogitSolveForWeightAndGain(
58 const Eigen::MatrixXf& hessian_and_reg, const Eigen::VectorXf& g,
59 Eigen::VectorXf* weight, float* gain) {
60 *weight = -hessian_and_reg.colPivHouseholderQr().solve(g);
61 *gain = -g.transpose() * (*weight);
62 }
63
64 // Used in stats_ops.cc to determine weights/gains for each feature split.
CalculateWeightsAndGains(const Eigen::VectorXf & g,const Eigen::VectorXf & h,const float l1,const float l2,Eigen::VectorXf * weight,float * gain)65 static void CalculateWeightsAndGains(const Eigen::VectorXf& g,
66 const Eigen::VectorXf& h, const float l1,
67 const float l2, Eigen::VectorXf* weight,
68 float* gain) {
69 const float kEps = 1e-15;
70 const int32 logits_dim = g.size();
71 if (logits_dim == 1) {
72 // The formula for weight is -(g+l1*sgn(w))/(H+l2), for gain it is
73 // (g+l1*sgn(w))^2/(h+l2).
74 // This is because for each leaf we optimize
75 // 1/2(h+l2)*w^2+g*w+l1*abs(w)
76 float g_with_l1 = g[0];
77 // Apply L1 regularization.
78 // 1) Assume w>0 => w=-(g+l1)/(h+l2)=> g+l1 < 0 => g < -l1
79 // 2) Assume w<0 => w=-(g-l1)/(h+l2)=> g-l1 > 0 => g > l1
80 // For g from (-l1, l1), thus there is no solution => set to 0.
81 if (l1 > 0) {
82 if (g[0] > l1) {
83 g_with_l1 -= l1;
84 } else if (g[0] < -l1) {
85 g_with_l1 += l1;
86 } else {
87 weight->coeffRef(0) = 0.0;
88 *gain = 0.0;
89 return;
90 }
91 }
92 // Apply L2 regularization.
93 if (h[0] + l2 <= kEps) {
94 // Avoid division by 0 or infinitesimal.
95 weight->coeffRef(0) = 0;
96 *gain = 0;
97 } else {
98 weight->coeffRef(0) = -g_with_l1 / (h[0] + l2);
99 *gain = -g_with_l1 * weight->coeffRef(0);
100 }
101 } else if (h.size() == logits_dim * logits_dim) { /* Full Hessian */
102 Eigen::MatrixXf identity;
103 identity.setIdentity(logits_dim, logits_dim);
104 // TODO(crawles): figure out L1 regularization for matrix form.
105 Eigen::MatrixXf hessian_and_reg =
106 h.reshaped(logits_dim, logits_dim) + l2 * identity;
107 MultiDimLogitSolveForWeightAndGain(hessian_and_reg, g, weight, gain);
108 } else if (h.size() == logits_dim) { /* Diagonal Hessian approximation. */
109 // TODO(crawles): figure out L1 regularization for matrix form.
110 Eigen::ArrayXf hessian_and_reg = h.array() + l2;
111 // Check if any of the elements are zeros.
112 bool invertible = true;
113 for (int i = 0; i < hessian_and_reg.size(); ++i) {
114 if (hessian_and_reg[i] == 0.0) {
115 invertible = false;
116 break;
117 }
118 }
119 if (invertible) {
120 // Operations on arrays are element wise. The formulas are as for full
121 // hessian, but for hessian of diagonal form they are simplified.
122 Eigen::ArrayXf ones = Eigen::ArrayXf::Ones(logits_dim);
123 Eigen::ArrayXf temp = ones / hessian_and_reg;
124 *weight = -temp * g.array();
125 *gain = (-g.array() * (*weight).array()).sum();
126 } else {
127 // Hessian is not invertible. We will go the same route as in full
128 // hessian to get an approximate solution.
129 MultiDimLogitSolveForWeightAndGain(hessian_and_reg.matrix().asDiagonal(),
130 g, weight, gain);
131 }
132 }
133 }
134
135 } // namespace tensorflow
136
137 #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_TREE_HELPER_H_
138