1syntax = "proto3"; 2option cc_enable_arenas = true; 3 4package tensorflow.boosted_trees.trees; 5 6// TreeNode describes a node in a tree. 7message TreeNode { 8 oneof node { 9 Leaf leaf = 1; 10 DenseFloatBinarySplit dense_float_binary_split = 2; 11 SparseFloatBinarySplitDefaultLeft sparse_float_binary_split_default_left = 12 3; 13 SparseFloatBinarySplitDefaultRight sparse_float_binary_split_default_right = 14 4; 15 CategoricalIdBinarySplit categorical_id_binary_split = 5; 16 CategoricalIdSetMembershipBinarySplit 17 categorical_id_set_membership_binary_split = 6; 18 ObliviousDenseFloatBinarySplit oblivious_dense_float_binary_split = 7; 19 ObliviousCategoricalIdBinarySplit oblivious_categorical_id_binary_split = 8; 20 } 21 TreeNodeMetadata node_metadata = 777; 22} 23 24// TreeNodeMetadata encodes metadata associated with each node in a tree. 25message TreeNodeMetadata { 26 // The gain associated with this node. 27 float gain = 1; 28 29 // The original leaf node before this node was split. 30 Leaf original_leaf = 2; 31 32 // The original layer of leaves before that layer was converted to a split. 33 repeated Leaf original_oblivious_leaves = 3; 34} 35 36// Leaves can either hold dense or sparse information. 37message Leaf { 38 oneof leaf { 39 // See third_party/tensorflow/contrib/decision_trees/ 40 // proto/generic_tree_model.proto 41 // for a description of how vector and sparse_vector might be used. 42 Vector vector = 1; 43 SparseVector sparse_vector = 2; 44 } 45} 46 47message Vector { 48 repeated float value = 1; 49} 50 51message SparseVector { 52 repeated int32 index = 1; 53 repeated float value = 2; 54} 55 56// Split rule for dense float features. 57message DenseFloatBinarySplit { 58 // Float feature column and split threshold describing 59 // the rule feature <= threshold. 60 int32 feature_column = 1; 61 // If feature column is multivalent, this holds the index of the dimension 62 // for the split. Defaults to 0. 63 int32 dimension_id = 5; 64 float threshold = 2; 65 66 // Node children indexing into a contiguous 67 // vector of nodes starting from the root. 68 int32 left_id = 3; 69 int32 right_id = 4; 70} 71 72// Split rule for sparse float features defaulting left for missing features. 73message SparseFloatBinarySplitDefaultLeft { 74 DenseFloatBinarySplit split = 1; 75} 76 77// Split rule for sparse float features defaulting right for missing features. 78message SparseFloatBinarySplitDefaultRight { 79 DenseFloatBinarySplit split = 1; 80} 81 82// Split rule for categorical features with a single feature Id. 83message CategoricalIdBinarySplit { 84 // Categorical feature column and Id describing 85 // the rule feature == Id. 86 int32 feature_column = 1; 87 int64 feature_id = 2; 88 89 // Node children indexing into a contiguous 90 // vector of nodes starting from the root. 91 int32 left_id = 3; 92 int32 right_id = 4; 93} 94 95// Split rule for categorical features with a set of feature Ids. 96message CategoricalIdSetMembershipBinarySplit { 97 // Categorical feature column and Id describing 98 // the rule feature ∈ feature_ids. 99 int32 feature_column = 1; 100 // Sorted list of Ids in the set. 101 repeated int64 feature_ids = 2; 102 103 // Node children indexing into a contiguous 104 // vector of nodes starting from the root. 105 int32 left_id = 3; 106 int32 right_id = 4; 107} 108 109// Split rule for dense float features in the oblivious case. 110message ObliviousDenseFloatBinarySplit { 111 // Float feature column and split threshold describing 112 // the rule feature <= threshold. 113 int32 feature_column = 1; 114 float threshold = 2; 115 // We don't store children ids, because either the next node represents the 116 // whole next layer of the tree or starting with the next node we only have 117 // leaves. 118} 119 120// Split rule for categorical features with a single feature Id in the oblivious 121// case. 122message ObliviousCategoricalIdBinarySplit { 123 // Categorical feature column and Id describing the rule feature == Id. 124 int32 feature_column = 1; 125 int64 feature_id = 2; 126 // We don't store children ids, because either the next node represents the 127 // whole next layer of the tree or starting with the next node we only have 128 // leaves. 129} 130 131// DecisionTreeConfig describes a list of connected nodes. 132// Node 0 must be the root and can carry any payload including a leaf 133// in the case of representing the bias. 134// Note that each node id is implicitly its index in the list of nodes. 135message DecisionTreeConfig { 136 repeated TreeNode nodes = 1; 137} 138 139message DecisionTreeMetadata { 140 // How many times tree weight was updated (due to reweighting of the final 141 // ensemble, dropout, shrinkage etc). 142 int32 num_tree_weight_updates = 1; 143 144 // Number of layers grown for this tree. 145 int32 num_layers_grown = 2; 146 147 // Whether the tree is finalized in that no more layers can be grown. 148 bool is_finalized = 3; 149} 150 151message GrowingMetadata { 152 // Number of trees that we have attempted to build. After pruning, these 153 // trees might have been removed. 154 int64 num_trees_attempted = 1; 155 // Number of layers that we have attempted to build. After pruning, these 156 // layers might have been removed. 157 int64 num_layers_attempted = 2; 158 159 // Sorted list of column handlers that have been used in at least one split 160 // so far. 161 repeated int64 used_handler_ids = 3; 162} 163 164// DecisionTreeEnsembleConfig describes an ensemble of decision trees. 165message DecisionTreeEnsembleConfig { 166 repeated DecisionTreeConfig trees = 1; 167 repeated float tree_weights = 2; 168 repeated DecisionTreeMetadata tree_metadata = 3; 169 170 // Metadata that is used during the training. 171 GrowingMetadata growing_metadata = 4; 172} 173