1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 #include "tensorflow/core/framework/common_shape_fns.h" 16 #include "tensorflow/core/framework/op.h" 17 #include "tensorflow/core/framework/resource_mgr.h" 18 #include "tensorflow/core/framework/shape_inference.h" 19 20 namespace tensorflow { 21 namespace boosted_trees { 22 using shape_inference::DimensionHandle; 23 using shape_inference::InferenceContext; 24 using shape_inference::ShapeHandle; 25 26 REGISTER_RESOURCE_HANDLE_OP(QuantileStreamResource); 27 28 REGISTER_OP("QuantileAccumulatorIsInitialized") 29 .Input("quantile_accumulator_handle: resource") 30 .Output("is_initialized: bool") 31 .SetShapeFn(shape_inference::ScalarShape) 32 .Doc(R"doc( 33 Checks whether a quantile accumulator has been initialized. 34 )doc"); 35 36 REGISTER_OP("CreateQuantileAccumulator") 37 .Attr("container: string = ''") 38 .Attr("shared_name: string = ''") 39 .Attr("max_elements: int = 1099511627776") // 1 << 40 40 .Attr("epsilon: float") 41 .Attr("num_quantiles: int") 42 .Attr("generate_quantiles: bool=False") 43 .Input("quantile_accumulator_handle: resource") 44 .Input("stamp_token: int64") __anonf41644880102(shape_inference::InferenceContext* c) 45 .SetShapeFn([](shape_inference::InferenceContext* c) { 46 shape_inference::ShapeHandle unused_input; 47 TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused_input)); 48 TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused_input)); 49 return Status::OK(); 50 }) 51 .Doc(R"doc( 52 Creates a stateful accumulator for quantile summaries. 53 54 epsilon: Error bound on the quantile summary. 55 num_quantiles: Number of buckets that we create from the data. 56 stamp_token: Token to use as the initial value of the resource stamp. 57 quantile_accumulator_handle: The handle to the accumulator. 58 )doc"); 59 60 REGISTER_OP("QuantileAccumulatorAddSummaries") 61 .Attr("num_resource_handles: int >= 1") 62 .Input("quantile_accumulator_handles: num_resource_handles * resource") 63 .Input("stamp_token: int64") 64 .Input("summaries: num_resource_handles * string") __anonf41644880202(InferenceContext* c) 65 .SetShapeFn([](InferenceContext* c) { 66 int num_resource_handles; 67 TF_RETURN_IF_ERROR( 68 c->GetAttr("num_resource_handles", &num_resource_handles)); 69 // All the inputs are scalars. 70 shape_inference::ShapeHandle unused_input; 71 for (int i = 0; i < 2 * num_resource_handles + 1; ++i) { 72 TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused_input)); 73 } 74 return Status::OK(); 75 }) 76 .Doc(R"doc( 77 Adds each quantile summary to its stream. 78 79 quantile_accumulator_handles: The handles to the quantile stream resources. 80 stamp_token: Stamp token to validate the Read/Write operation. 81 summaries: A list of serialized QuantileSummaryState. 82 )doc"); 83 84 REGISTER_OP("QuantileAccumulatorGetBuckets") 85 .Attr("num_resource_handles: int >= 1") 86 .Input("quantile_accumulator_handles: num_resource_handles * resource") 87 .Input("stamp_token: int64") 88 .Output("are_buckets_ready: num_resource_handles * bool") 89 .Output("buckets: num_resource_handles * float") __anonf41644880302(InferenceContext* c) 90 .SetShapeFn([](InferenceContext* c) { 91 int num_resource_handles; 92 TF_RETURN_IF_ERROR( 93 c->GetAttr("num_resource_handles", &num_resource_handles)); 94 for (int i = 0; i < num_resource_handles; ++i) { 95 c->set_output(i, c->Scalar()); 96 c->set_output(i + num_resource_handles, c->Vector(c->UnknownDim())); 97 } 98 return Status::OK(); 99 }) 100 101 .Doc(R"doc( 102 Returns quantile buckets created during previous flush of the accumulator. 103 104 quantile_accumulator_handles: The handles to the quantile stream resources. 105 stamp_token: Stamp token to validate the Read/Write operation. 106 are_buckets_ready: Whether the buckets are ready or not. 107 buckets: Output quantile summary representing boundaries with "num_quantile" 108 elements. 109 )doc"); 110 111 REGISTER_OP("QuantileAccumulatorFlush") 112 .Input("quantile_accumulator_handle: resource") 113 .Input("stamp_token: int64") 114 .Input("next_stamp_token: int64") 115 .Doc(R"doc( 116 Resets quantile summary streams for each column with a new token. 117 118 quantile_accumulator_handle: The handle to the accumulator. 119 stamp_token: Stamp token for Read/Write operations. 120 Any operation with a mismatching token will be dropped. 121 next_stamp_token: Stamp token to be used for the next iteration. 122 )doc"); 123 124 REGISTER_OP("QuantileAccumulatorFlushSummary") 125 .Input("quantile_accumulator_handle: resource") 126 .Input("stamp_token: int64") 127 .Input("next_stamp_token: int64") 128 .Output("output: string") 129 .Doc(R"doc( 130 Resets quantile summary stream and returns the summary. 131 132 quantile_accumulator_handle: The handle to the accumulator. 133 stamp_token: Stamp token for Read/Write operations. 134 Any operation with a mismatching token will be dropped. 135 next_stamp_token: Stamp token to be used for the next iteration. 136 output: A scalar string that is the a summary of the accumulator. 137 )doc"); 138 139 REGISTER_OP("QuantileAccumulatorSerialize") 140 .Input("quantile_accumulator_handle: resource") 141 .Output("stamp_token: int64") 142 .Output("stream_state: string") 143 .Output("are_buckets_ready: bool") 144 .Output("buckets: float") 145 .Doc(R"doc( 146 Serializes the state of the given resource. 147 148 quantile_accumulator_handle: The handle to the accumulator. 149 stamp_token: Stamp token for Read/Write operations. 150 Any operation with a mismatching token will be dropped. 151 stream_state: A serialized QuantileStreamState. 152 are_buckets_ready: Whether the buckets are ready or not. 153 buckets: Output quantile buckets representing boundaries with "num_quantile" 154 elements. 155 )doc"); 156 157 REGISTER_OP("QuantileAccumulatorDeserialize") 158 .Input("quantile_accumulator_handle: resource") 159 .Input("stamp_token: int64") 160 .Input("stream_state: string") 161 .Input("are_buckets_ready: bool") 162 .Input("buckets: float") 163 .Doc(R"doc( 164 Serializes the state of the given resource. 165 166 quantile_accumulator_handle: The handle to the accumulator. 167 stamp_token: Stamp token for Read/Write operations. 168 Any operation with a mismatching token will be dropped. 169 stream_state: A serialized QuantileStreamState. 170 are_buckets_ready: Whether the buckets are ready or not. 171 buckets: Output quantile summary representing boundaries with "num_quantile" 172 elements. 173 )doc"); 174 175 REGISTER_OP("MakeQuantileSummaries") 176 .Attr("num_dense_features: int >= 0") 177 .Attr("num_sparse_features: int >= 0") 178 .Attr("epsilon: float") 179 .Input("dense_float_features: num_dense_features * float") 180 .Input("sparse_float_feature_indices: num_sparse_features * int64") 181 .Input("sparse_float_feature_values: num_sparse_features * float") 182 .Input("sparse_float_feature_shapes: num_sparse_features * int64") 183 .Input("example_weights: float") 184 .Output("dense_summaries: num_dense_features * string") 185 .Output("sparse_summaries: num_sparse_features * string") __anonf41644880402(InferenceContext* c) 186 .SetShapeFn([](InferenceContext* c) { 187 int num_dense_features; 188 TF_RETURN_IF_ERROR(c->GetAttr("num_dense_features", &num_dense_features)); 189 int num_sparse_features; 190 TF_RETURN_IF_ERROR( 191 c->GetAttr("num_sparse_features", &num_sparse_features)); 192 ShapeHandle example_weights_shape; 193 int example_weights_index = num_dense_features + num_sparse_features * 3; 194 TF_RETURN_IF_ERROR(c->WithRank(c->input(example_weights_index), 2, 195 &example_weights_shape)); 196 for (int i = 0; i < num_dense_features; ++i) { 197 ShapeHandle dense_feature_shape; 198 DimensionHandle unused_dim; 199 TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 2, &dense_feature_shape)); 200 TF_RETURN_IF_ERROR(c->Merge(c->Dim(dense_feature_shape, 0), 201 c->Dim(example_weights_shape, 0), 202 &unused_dim)); 203 c->set_output(i, c->Scalar()); 204 } 205 for (int i = 0; i < num_sparse_features; ++i) { 206 c->set_output(i + num_dense_features, c->Scalar()); 207 } 208 return Status::OK(); 209 }) 210 .Doc(R"doc( 211 Creates a summary for the given features. 212 213 num_dense_features: Number of dense feature groups to compute quantiles on. 214 num_sparse_features: Number of sparse feature groups to compute quantiles on. 215 epsilon: Error bound on the computed summary. 216 dense_float_features: A list of vectors which contains dense values. 217 sparse_float_feature_indices: List of rank 2 tensors containing the sparse float 218 feature indices. 219 sparse_float_feature_values: List of rank 1 tensors containing the sparse float 220 feature values. 221 sparse_float_feature_shapes: List of rank 1 tensors containing the shape of the 222 float feature. 223 example_weights: Rank 2 (N, 1) tensor of per-example weights. Should match 224 dense and sparse features shape. 225 dense_summaries: A list of serialized QuantileSummaryState for dense columns. 226 sparse_summaries: A list of serialized QuantileSummaryState for sparse columns. 227 )doc"); 228 229 REGISTER_OP("QuantileBuckets") 230 .Attr("num_dense_features: int >= 0") 231 .Attr("num_sparse_features: int >= 0") 232 .Attr("dense_config: list(string)") 233 .Attr("sparse_config: list(string)") 234 .Input("dense_float_features: num_dense_features * float") 235 .Input("sparse_float_feature_indices: num_sparse_features * int64") 236 .Input("sparse_float_feature_values: num_sparse_features * float") 237 .Input("sparse_float_feature_shapes: num_sparse_features * int64") 238 .Input("example_weights: float") 239 .Output("dense_buckets: num_dense_features * float") 240 .Output("sparse_buckets: num_sparse_features * float") 241 .Doc(R"doc( 242 Computes quantile buckets for a given list of dense and sparse features with 243 given example weights. 244 245 num_dense_features: Number of dense feature groups to compute quantiles on. 246 num_sparse_features: Number of sparse feature groups to compute quantiles on. 247 dense_config: Config for computing buckets for dense values. 248 Each entry is QuantileConfig proto. 249 sparse_config: Config for computing buckets for sparse feature values. 250 Each entry is QuantileConfig proto. 251 dense_float_features: A list of vectors which contains dense values. 252 sparse_float_feature_indices: List of rank 2 tensors containing the sparse float 253 feature indices. 254 sparse_float_feature_values: List of rank 1 tensors containing the sparse float 255 feature values. 256 sparse_float_feature_shapes: List of rank 1 tensors containing the shape of the 257 float feature. 258 example_weights: Rank 1 tensor containing the example weight tensor. 259 dense_buckets: Output quantile summary for each dense float tensor 260 representing boundaries each with "num_quantile" elements. 261 sparse_buckets: Output quantile summary for each sparse float value tensor 262 representing boundaries each with "num_quantile" elements. 263 )doc"); 264 265 REGISTER_OP("Quantiles") 266 .Attr("num_dense_features: int >= 0") 267 .Attr("num_sparse_features: int >= 0") 268 .Input("dense_values: num_dense_features * float") 269 .Input("sparse_values: num_sparse_features * float") 270 .Input("dense_buckets: num_dense_features * float") 271 .Input("sparse_buckets: num_sparse_features * float") 272 .Input("sparse_indices: num_sparse_features * int64") 273 .Output("dense_quantiles: num_dense_features * int32") 274 .Output("sparse_quantiles: num_sparse_features * int32") __anonf41644880502(InferenceContext* c) 275 .SetShapeFn([](InferenceContext* c) { 276 int num_dense_features; 277 TF_RETURN_IF_ERROR(c->GetAttr("num_dense_features", &num_dense_features)); 278 int num_sparse_features; 279 TF_RETURN_IF_ERROR( 280 c->GetAttr("num_sparse_features", &num_sparse_features)); 281 // Set output shapes (dense_quantiles and sparse_quantiles) by the 282 // relevant inputs (dense_values and sparse_values). Note that the output 283 // has an additional dimension for dimension_ids. 284 for (int i = 0; i < num_dense_features + num_sparse_features; ++i) { 285 c->set_output(i, c->MakeShape({c->Dim(c->input(i), 0), 2})); 286 } 287 return Status::OK(); 288 }) 289 .Doc(R"doc( 290 Computes quantile for each a given list of dense and sparse feature values using 291 the given buckets. 292 293 num_dense_features: Number of dense feature groups to generate quantiles for. 294 num_sparse_features: Number of sparse feature groups to generate quantiles for. 295 dense_values: List of rank 1 tensors containing the dense values. 296 sparse_values: List of rank 1 tensors containing the sparse feature values. 297 dense_buckets: Quantile summary for each of the dense float tensor. 298 sparse_buckets: Quantile summary for each of the sparse feature float tensor. 299 sparse_indices: List of rank 2 tensors with indices for sparse float 300 tensors. 301 dense_quantiles: Rank 2 tensors representing associated quantiles for each of 302 dense float tensors and the dimension. 303 sparse_quantiles: Rank 2 tensors representing associated quantiles for each of 304 the sparse feature tensors for each of sparse feature dimensions: 305 [quantile id, dimension id]. 306 )doc"); 307 308 REGISTER_OP("BucketizeWithInputBoundaries") 309 .Input("input: T") 310 .Input("boundaries: float") 311 .Output("output: int32") 312 .Attr("T: {int32, int64, float, double}") 313 .SetShapeFn(shape_inference::UnchangedShape) 314 .Doc(R"doc( 315 Bucketizes 'input' based on 'boundaries'. This function is similar to Bucketize 316 op in core math_ops, except that boundaries are specified using an input tensor, 317 as compared with a fixed attribute in Bucketize(). 318 319 For example, if the inputs are 320 boundaries = [0, 10, 100] 321 input = [[-5, 10000] 322 [150, 10] 323 [5, 100]] 324 325 then the output will be 326 output = [[0, 3] 327 [3, 2] 328 [1, 3]] 329 330 input: Any shape of Tensor contains with numeric type. 331 boundaries: A vector Tensor of sorted floats specifies the boundaries 332 of the buckets. 333 output: Same shape as 'input', where each value of input is replaced with its corresponding bucket index. 334 )doc"); 335 336 } // namespace boosted_trees 337 } // namespace tensorflow 338