• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // =============================================================================
15 #include "tensorflow/core/framework/common_shape_fns.h"
16 #include "tensorflow/core/framework/op.h"
17 #include "tensorflow/core/framework/resource_mgr.h"
18 #include "tensorflow/core/framework/shape_inference.h"
19 
20 namespace tensorflow {
21 namespace boosted_trees {
22 using shape_inference::DimensionHandle;
23 using shape_inference::InferenceContext;
24 using shape_inference::ShapeHandle;
25 
26 REGISTER_RESOURCE_HANDLE_OP(QuantileStreamResource);
27 
28 REGISTER_OP("QuantileAccumulatorIsInitialized")
29     .Input("quantile_accumulator_handle: resource")
30     .Output("is_initialized: bool")
31     .SetShapeFn(shape_inference::ScalarShape)
32     .Doc(R"doc(
33 Checks whether a quantile accumulator has been initialized.
34 )doc");
35 
36 REGISTER_OP("CreateQuantileAccumulator")
37     .Attr("container: string = ''")
38     .Attr("shared_name: string = ''")
39     .Attr("max_elements: int = 1099511627776")  // 1 << 40
40     .Attr("epsilon: float")
41     .Attr("num_quantiles: int")
42     .Attr("generate_quantiles: bool=False")
43     .Input("quantile_accumulator_handle: resource")
44     .Input("stamp_token: int64")
__anonf41644880102(shape_inference::InferenceContext* c) 45     .SetShapeFn([](shape_inference::InferenceContext* c) {
46       shape_inference::ShapeHandle unused_input;
47       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused_input));
48       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused_input));
49       return Status::OK();
50     })
51     .Doc(R"doc(
52 Creates a stateful accumulator for quantile summaries.
53 
54 epsilon: Error bound on the quantile summary.
55 num_quantiles: Number of buckets that we create from the data.
56 stamp_token: Token to use as the initial value of the resource stamp.
57 quantile_accumulator_handle: The handle to the accumulator.
58 )doc");
59 
60 REGISTER_OP("QuantileAccumulatorAddSummaries")
61     .Attr("num_resource_handles: int >= 1")
62     .Input("quantile_accumulator_handles: num_resource_handles * resource")
63     .Input("stamp_token: int64")
64     .Input("summaries: num_resource_handles * string")
__anonf41644880202(InferenceContext* c) 65     .SetShapeFn([](InferenceContext* c) {
66       int num_resource_handles;
67       TF_RETURN_IF_ERROR(
68           c->GetAttr("num_resource_handles", &num_resource_handles));
69       // All the inputs are scalars.
70       shape_inference::ShapeHandle unused_input;
71       for (int i = 0; i < 2 * num_resource_handles + 1; ++i) {
72         TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused_input));
73       }
74       return Status::OK();
75     })
76     .Doc(R"doc(
77 Adds each quantile summary to its stream.
78 
79 quantile_accumulator_handles: The handles to the quantile stream resources.
80 stamp_token: Stamp token to validate the Read/Write operation.
81 summaries: A list of serialized QuantileSummaryState.
82 )doc");
83 
84 REGISTER_OP("QuantileAccumulatorGetBuckets")
85     .Attr("num_resource_handles: int >= 1")
86     .Input("quantile_accumulator_handles: num_resource_handles * resource")
87     .Input("stamp_token: int64")
88     .Output("are_buckets_ready: num_resource_handles * bool")
89     .Output("buckets: num_resource_handles * float")
__anonf41644880302(InferenceContext* c) 90     .SetShapeFn([](InferenceContext* c) {
91       int num_resource_handles;
92       TF_RETURN_IF_ERROR(
93           c->GetAttr("num_resource_handles", &num_resource_handles));
94       for (int i = 0; i < num_resource_handles; ++i) {
95         c->set_output(i, c->Scalar());
96         c->set_output(i + num_resource_handles, c->Vector(c->UnknownDim()));
97       }
98       return Status::OK();
99     })
100 
101     .Doc(R"doc(
102 Returns quantile buckets created during previous flush of the accumulator.
103 
104 quantile_accumulator_handles: The handles to the quantile stream resources.
105 stamp_token: Stamp token to validate the Read/Write operation.
106 are_buckets_ready: Whether the buckets are ready or not.
107 buckets: Output quantile summary representing boundaries with "num_quantile"
108     elements.
109 )doc");
110 
111 REGISTER_OP("QuantileAccumulatorFlush")
112     .Input("quantile_accumulator_handle: resource")
113     .Input("stamp_token: int64")
114     .Input("next_stamp_token: int64")
115     .Doc(R"doc(
116 Resets quantile summary streams for each column with a new token.
117 
118 quantile_accumulator_handle: The handle to the accumulator.
119 stamp_token: Stamp token for Read/Write operations.
120              Any operation with a mismatching token will be dropped.
121 next_stamp_token: Stamp token to be used for the next iteration.
122 )doc");
123 
124 REGISTER_OP("QuantileAccumulatorFlushSummary")
125     .Input("quantile_accumulator_handle: resource")
126     .Input("stamp_token: int64")
127     .Input("next_stamp_token: int64")
128     .Output("output: string")
129     .Doc(R"doc(
130 Resets quantile summary stream and returns the summary.
131 
132 quantile_accumulator_handle: The handle to the accumulator.
133 stamp_token: Stamp token for Read/Write operations.
134              Any operation with a mismatching token will be dropped.
135 next_stamp_token: Stamp token to be used for the next iteration.
136 output: A scalar string that is the a summary of the accumulator.
137 )doc");
138 
139 REGISTER_OP("QuantileAccumulatorSerialize")
140     .Input("quantile_accumulator_handle: resource")
141     .Output("stamp_token: int64")
142     .Output("stream_state: string")
143     .Output("are_buckets_ready: bool")
144     .Output("buckets: float")
145     .Doc(R"doc(
146 Serializes the state of the given resource.
147 
148 quantile_accumulator_handle: The handle to the accumulator.
149 stamp_token: Stamp token for Read/Write operations.
150              Any operation with a mismatching token will be dropped.
151 stream_state: A serialized QuantileStreamState.
152 are_buckets_ready: Whether the buckets are ready or not.
153 buckets: Output quantile buckets representing boundaries with "num_quantile"
154     elements.
155 )doc");
156 
157 REGISTER_OP("QuantileAccumulatorDeserialize")
158     .Input("quantile_accumulator_handle: resource")
159     .Input("stamp_token: int64")
160     .Input("stream_state: string")
161     .Input("are_buckets_ready: bool")
162     .Input("buckets: float")
163     .Doc(R"doc(
164 Serializes the state of the given resource.
165 
166 quantile_accumulator_handle: The handle to the accumulator.
167 stamp_token: Stamp token for Read/Write operations.
168              Any operation with a mismatching token will be dropped.
169 stream_state: A serialized QuantileStreamState.
170 are_buckets_ready: Whether the buckets are ready or not.
171 buckets: Output quantile summary representing boundaries with "num_quantile"
172     elements.
173 )doc");
174 
175 REGISTER_OP("MakeQuantileSummaries")
176     .Attr("num_dense_features: int >= 0")
177     .Attr("num_sparse_features: int >= 0")
178     .Attr("epsilon: float")
179     .Input("dense_float_features: num_dense_features * float")
180     .Input("sparse_float_feature_indices: num_sparse_features * int64")
181     .Input("sparse_float_feature_values: num_sparse_features * float")
182     .Input("sparse_float_feature_shapes: num_sparse_features * int64")
183     .Input("example_weights: float")
184     .Output("dense_summaries: num_dense_features * string")
185     .Output("sparse_summaries: num_sparse_features * string")
__anonf41644880402(InferenceContext* c) 186     .SetShapeFn([](InferenceContext* c) {
187       int num_dense_features;
188       TF_RETURN_IF_ERROR(c->GetAttr("num_dense_features", &num_dense_features));
189       int num_sparse_features;
190       TF_RETURN_IF_ERROR(
191           c->GetAttr("num_sparse_features", &num_sparse_features));
192       ShapeHandle example_weights_shape;
193       int example_weights_index = num_dense_features + num_sparse_features * 3;
194       TF_RETURN_IF_ERROR(c->WithRank(c->input(example_weights_index), 2,
195                                      &example_weights_shape));
196       for (int i = 0; i < num_dense_features; ++i) {
197         ShapeHandle dense_feature_shape;
198         DimensionHandle unused_dim;
199         TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 2, &dense_feature_shape));
200         TF_RETURN_IF_ERROR(c->Merge(c->Dim(dense_feature_shape, 0),
201                                     c->Dim(example_weights_shape, 0),
202                                     &unused_dim));
203         c->set_output(i, c->Scalar());
204       }
205       for (int i = 0; i < num_sparse_features; ++i) {
206         c->set_output(i + num_dense_features, c->Scalar());
207       }
208       return Status::OK();
209     })
210     .Doc(R"doc(
211 Creates a summary for the given features.
212 
213 num_dense_features: Number of dense feature groups to compute quantiles on.
214 num_sparse_features: Number of sparse feature groups to compute quantiles on.
215 epsilon: Error bound on the computed summary.
216 dense_float_features: A list of vectors which contains dense values.
217 sparse_float_feature_indices: List of rank 2 tensors containing the sparse float
218 feature indices.
219 sparse_float_feature_values: List of rank 1 tensors containing the sparse float
220 feature values.
221 sparse_float_feature_shapes: List of rank 1 tensors containing the shape of the
222 float feature.
223 example_weights: Rank 2 (N, 1) tensor of per-example weights. Should match
224     dense and sparse features shape.
225 dense_summaries: A list of serialized QuantileSummaryState for dense columns.
226 sparse_summaries: A list of serialized QuantileSummaryState for sparse columns.
227 )doc");
228 
229 REGISTER_OP("QuantileBuckets")
230     .Attr("num_dense_features: int >= 0")
231     .Attr("num_sparse_features: int >= 0")
232     .Attr("dense_config: list(string)")
233     .Attr("sparse_config: list(string)")
234     .Input("dense_float_features: num_dense_features * float")
235     .Input("sparse_float_feature_indices: num_sparse_features * int64")
236     .Input("sparse_float_feature_values: num_sparse_features * float")
237     .Input("sparse_float_feature_shapes: num_sparse_features * int64")
238     .Input("example_weights: float")
239     .Output("dense_buckets: num_dense_features * float")
240     .Output("sparse_buckets: num_sparse_features * float")
241     .Doc(R"doc(
242 Computes quantile buckets for a given list of dense and sparse features with
243 given example weights.
244 
245 num_dense_features: Number of dense feature groups to compute quantiles on.
246 num_sparse_features: Number of sparse feature groups to compute quantiles on.
247 dense_config: Config for computing buckets for dense values.
248 Each entry is QuantileConfig proto.
249 sparse_config: Config for computing buckets for sparse feature values.
250 Each entry is QuantileConfig proto.
251 dense_float_features: A list of vectors which contains dense values.
252 sparse_float_feature_indices: List of rank 2 tensors containing the sparse float
253 feature indices.
254 sparse_float_feature_values: List of rank 1 tensors containing the sparse float
255 feature values.
256 sparse_float_feature_shapes: List of rank 1 tensors containing the shape of the
257 float feature.
258 example_weights: Rank 1 tensor containing the example weight tensor.
259 dense_buckets: Output quantile summary for each dense float tensor
260 representing boundaries each with "num_quantile" elements.
261 sparse_buckets: Output quantile summary for each sparse float value tensor
262 representing boundaries each with "num_quantile" elements.
263 )doc");
264 
265 REGISTER_OP("Quantiles")
266     .Attr("num_dense_features: int >= 0")
267     .Attr("num_sparse_features: int >= 0")
268     .Input("dense_values: num_dense_features * float")
269     .Input("sparse_values: num_sparse_features * float")
270     .Input("dense_buckets: num_dense_features * float")
271     .Input("sparse_buckets: num_sparse_features * float")
272     .Input("sparse_indices: num_sparse_features * int64")
273     .Output("dense_quantiles: num_dense_features * int32")
274     .Output("sparse_quantiles: num_sparse_features * int32")
__anonf41644880502(InferenceContext* c) 275     .SetShapeFn([](InferenceContext* c) {
276       int num_dense_features;
277       TF_RETURN_IF_ERROR(c->GetAttr("num_dense_features", &num_dense_features));
278       int num_sparse_features;
279       TF_RETURN_IF_ERROR(
280           c->GetAttr("num_sparse_features", &num_sparse_features));
281       // Set output shapes (dense_quantiles and sparse_quantiles) by the
282       // relevant inputs (dense_values and sparse_values). Note that the output
283       // has an additional dimension for dimension_ids.
284       for (int i = 0; i < num_dense_features + num_sparse_features; ++i) {
285         c->set_output(i, c->MakeShape({c->Dim(c->input(i), 0), 2}));
286       }
287       return Status::OK();
288     })
289     .Doc(R"doc(
290 Computes quantile for each a given list of dense and sparse feature values using
291 the given buckets.
292 
293 num_dense_features: Number of dense feature groups to generate quantiles for.
294 num_sparse_features: Number of sparse feature groups to generate quantiles for.
295 dense_values: List of rank 1 tensors containing the dense values.
296 sparse_values: List of rank 1 tensors containing the sparse feature values.
297 dense_buckets: Quantile summary for each of the dense float tensor.
298 sparse_buckets: Quantile summary for each of the sparse feature float tensor.
299 sparse_indices: List of rank 2 tensors with indices for sparse float
300 tensors.
301 dense_quantiles: Rank 2 tensors representing associated quantiles for each of
302 dense float tensors and the dimension.
303 sparse_quantiles: Rank 2 tensors representing associated quantiles for each of
304 the sparse feature tensors for each of sparse feature dimensions:
305 [quantile id, dimension id].
306 )doc");
307 
308 REGISTER_OP("BucketizeWithInputBoundaries")
309     .Input("input: T")
310     .Input("boundaries: float")
311     .Output("output: int32")
312     .Attr("T: {int32, int64, float, double}")
313     .SetShapeFn(shape_inference::UnchangedShape)
314     .Doc(R"doc(
315 Bucketizes 'input' based on 'boundaries'. This function is similar to Bucketize
316 op in core math_ops, except that boundaries are specified using an input tensor,
317 as compared with a fixed attribute in Bucketize().
318 
319 For example, if the inputs are
320     boundaries = [0, 10, 100]
321     input = [[-5, 10000]
322              [150,   10]
323              [5,    100]]
324 
325 then the output will be
326     output = [[0, 3]
327               [3, 2]
328               [1, 3]]
329 
330 input: Any shape of Tensor contains with numeric type.
331 boundaries: A vector Tensor of sorted floats specifies the boundaries
332 of the buckets.
333 output: Same shape as 'input', where each value of input is replaced with its corresponding bucket index.
334 )doc");
335 
336 }  // namespace boosted_trees
337 }  // namespace tensorflow
338