1// Generic representation of tree-based models. 2 3// This proto establishes a shared standard: "fully compatible" projects should 4// provide support for all reasonable models expressed through it. Therefore, 5// it should be kept as simple as possible, and should never contain 6// project-specific design choices. 7 8// Status: work in progress. This proto can change anytime without notice. 9 10syntax = "proto3"; 11option cc_enable_arenas = true; 12 13package tensorflow.decision_trees; 14 15import "google/protobuf/any.proto"; 16import "google/protobuf/wrappers.proto"; 17 18// A generic handle for any type of model. 19message Model { 20 oneof model { 21 DecisionTree decision_tree = 1; 22 Ensemble ensemble = 2; 23 google.protobuf.Any custom_model = 3; 24 } 25 repeated google.protobuf.Any additional_data = 4; 26} 27 28message ModelAndFeatures { 29 message Feature { 30 // TODO(jonasz): Remove this field, as it's confusing. Ctx: cr/153569450. 31 FeatureId feature_id = 1 [deprecated = true]; 32 repeated google.protobuf.Any additional_data = 2; 33 }; 34 // Given a FeatureId feature_id, the feature's description is in 35 // features[feature_id.id.value]. 36 map<string, Feature> features = 1; 37 Model model = 2; 38 repeated google.protobuf.Any additional_data = 3; 39} 40 41// An ordered sequence of models. This message can be used to express bagged or 42// boosted models, as well as custom ensembles. 43message Ensemble { 44 message Member { 45 Model submodel = 1; 46 google.protobuf.Int32Value submodel_id = 2; 47 repeated google.protobuf.Any additional_data = 3; 48 } 49 repeated Member members = 100; // A higher id for more readable printing. 50 51 // The presence of a certain combination_technique indicates how to combine 52 // the outputs of member models in order to compute the ensemble's output. 53 oneof combination_technique { 54 Summation summation_combination_technique = 1; 55 Averaging averaging_combination_technique = 2; 56 google.protobuf.Any custom_combination_technique = 3; 57 } 58 repeated google.protobuf.Any additional_data = 4; 59} 60 61// When present, the Ensemble's output is the sum of member models' outputs. 62message Summation { 63 repeated google.protobuf.Any additional_data = 1; 64}; 65 66 67// When present, the Ensemble's output is the average of member models' outputs. 68message Averaging { 69 repeated google.protobuf.Any additional_data = 1; 70}; 71 72 73message DecisionTree { 74 repeated TreeNode nodes = 1; 75 repeated google.protobuf.Any additional_data = 2; 76}; 77 78 79message TreeNode { 80 // Following fields are provided for convenience and better readability. 81 // Filling them in is not required. 82 google.protobuf.Int32Value node_id = 1; 83 google.protobuf.Int32Value depth = 2; 84 google.protobuf.Int32Value subtree_size = 3; 85 86 oneof node_type { 87 BinaryNode binary_node = 4; 88 Leaf leaf = 5; 89 google.protobuf.Any custom_node_type = 6; 90 } 91 92 repeated google.protobuf.Any additional_data = 7; 93} 94 95 96message BinaryNode { 97 google.protobuf.Int32Value left_child_id = 1; 98 google.protobuf.Int32Value right_child_id = 2; 99 enum Direction { 100 LEFT = 0; 101 RIGHT = 1; 102 } 103 // When left_child_test is undefined for a particular datapoint (e.g. because 104 // it's not defined when feature value is missing), the datapoint should go 105 // in this direction. 106 Direction default_direction = 3; 107 // When a datapoint satisfies the test, it should be propagated to the left 108 // child. 109 oneof left_child_test { 110 InequalityTest inequality_left_child_test = 4; 111 google.protobuf.Any custom_left_child_test = 5; 112 } 113}; 114 115// A SparseVector represents a vector in which only certain select elements 116// are non-zero. Maps labels to values (e.g. class id to probability or count). 117message SparseVector { 118 map<int64, Value> sparse_value = 1; 119} 120 121message Vector { 122 repeated Value value = 1; 123} 124 125message Leaf { 126 oneof leaf { 127 // The interpretation of the values held in the leaves of a decision tree 128 // is application specific, but some common cases are: 129 // 1) len(vector) = 1, and the floating point value[0] holds the class 0 130 // probability in a two class classification problem. 131 // 2) len(vector) = 1, and the integer value[0] holds the class prediction. 132 // 3) The floating point value[i] holds the class i probability prediction. 133 // 4) The floating point value[i] holds the i-th component of the 134 // vector prediction in a regression problem. 135 // 5) sparse_vector holds the sparse class predictions for a classification 136 // problem with a large number of classes. 137 Vector vector = 1; 138 SparseVector sparse_vector = 2; 139 } 140 // For non-standard handling of leaves. 141 repeated google.protobuf.Any additional_data = 3; 142}; 143 144 145message FeatureId { 146 google.protobuf.StringValue id = 1; 147 repeated google.protobuf.Any additional_data = 2; 148}; 149 150message ObliqueFeatures { 151 // total value is sum(features[i] * weights[i]). 152 repeated FeatureId features = 1; 153 repeated float weights = 2; 154} 155 156 157message InequalityTest { 158 // When the feature is missing, the test's outcome is undefined. 159 oneof FeatureSum { 160 FeatureId feature_id = 1; 161 ObliqueFeatures oblique = 4; 162 } 163 enum Type { 164 LESS_OR_EQUAL = 0; 165 LESS_THAN = 1; 166 GREATER_OR_EQUAL = 2; 167 GREATER_THAN = 3; 168 }; 169 Type type = 2; 170 Value threshold = 3; 171}; 172 173 174// Represents a single value of any type, e.g. 5 or "abc". 175message Value { 176 oneof value { 177 float float_value = 1; 178 double double_value = 2; 179 int32 int32_value = 3; 180 int64 int64_value = 4; 181 google.protobuf.Any custom_value = 5; 182 } 183}; 184