• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# maxlengthations under the License.
14# ==============================================================================
15"""bincount ops."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import constant_op
22from tensorflow.python.framework import dtypes
23from tensorflow.python.framework import ops
24from tensorflow.python.framework import sparse_tensor
25from tensorflow.python.ops import array_ops
26from tensorflow.python.ops import check_ops
27from tensorflow.python.ops import gen_count_ops
28from tensorflow.python.ops import gen_math_ops
29from tensorflow.python.ops import math_ops
30from tensorflow.python.ops.ragged import ragged_tensor
31from tensorflow.python.util import deprecation
32from tensorflow.python.util.tf_export import tf_export
33
34
35@tf_export("math.bincount", v1=[])
36def bincount(arr,
37             weights=None,
38             minlength=None,
39             maxlength=None,
40             dtype=dtypes.int32,
41             name=None,
42             axis=None,
43             binary_output=False):
44  """Counts the number of occurrences of each value in an integer array.
45
46  If `minlength` and `maxlength` are not given, returns a vector with length
47  `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
48  If `weights` are non-None, then index `i` of the output stores the sum of the
49  value in `weights` at each index where the corresponding value in `arr` is
50  `i`.
51
52  ```python
53  values = tf.constant([1,1,2,3,2,4,4,5])
54  tf.math.bincount(values) #[0 2 2 1 2 1]
55  ```
56  Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6
57                  will be the vector length.
58
59  Each bin value in the output indicates number of occurrences of the particular
60  index. Here, index 1 in output has a value 2. This indicates value 1 occurs
61  two times in `values`.
62
63  ```python
64  values = tf.constant([1,1,2,3,2,4,4,5])
65  weights = tf.constant([1,5,0,1,0,5,4,5])
66  tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5]
67  ```
68  Bin will be incremented by the corresponding weight instead of 1.
69  Here, index 1 in output has a value 6. This is the summation of weights
70  corresponding to the value in `values`.
71
72  **Bin-counting on a certain axis**
73
74  This example takes a 2 dimensional input and returns a `Tensor` with
75  bincounting on each sample.
76
77  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
78  >>> tf.math.bincount(data, axis=-1)
79  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
80    array([[1, 1, 1, 1],
81           [2, 1, 1, 0]], dtype=int32)>
82
83
84  **Bin-counting with binary_output**
85
86  This example gives binary output instead of counting the occurrence.
87
88  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
89  >>> tf.math.bincount(data, axis=-1, binary_output=True)
90  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
91    array([[1, 1, 1, 1],
92           [1, 1, 1, 0]], dtype=int32)>
93
94  Args:
95    arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted.
96      These tensors must have a rank of 2 if `axis=-1`.
97    weights: If non-None, must be the same shape as arr. For each value in
98      `arr`, the bin will be incremented by the corresponding weight instead of
99      1.
100    minlength: If given, ensures the output has length at least `minlength`,
101      padding with zeros at the end if necessary.
102    maxlength: If given, skips values in `arr` that are equal or greater than
103      `maxlength`, ensuring that the output has length at most `maxlength`.
104    dtype: If `weights` is None, determines the type of the output bins.
105    name: A name scope for the associated operations (optional).
106    axis: The axis to slice over. Axes at and below `axis` will be flattened
107      before bin counting. Currently, only `0`, and `-1` are supported. If None,
108      all axes will be flattened (identical to passing `0`).
109    binary_output: If True, this op will output 1 instead of the number of times
110      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
111      reduce_add). Defaults to False.
112
113  Returns:
114    A vector with the same dtype as `weights` or the given `dtype`. The bin
115    values.
116
117  Raises:
118    `InvalidArgumentError` if negative values are provided as an input.
119
120  """
121  name = "bincount" if name is None else name
122  with ops.name_scope(name):
123    # Somehow forward compatible needs to be False.
124    if not binary_output and axis is None:
125      arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
126      array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
127      output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * (
128          math_ops.reduce_max(arr) + 1)
129      if minlength is not None:
130        minlength = ops.convert_to_tensor(
131            minlength, name="minlength", dtype=dtypes.int32)
132        output_size = gen_math_ops.maximum(minlength, output_size)
133      if maxlength is not None:
134        maxlength = ops.convert_to_tensor(
135            maxlength, name="maxlength", dtype=dtypes.int32)
136        output_size = gen_math_ops.minimum(maxlength, output_size)
137      if weights is not None:
138        weights = ops.convert_to_tensor(weights, name="weights")
139        return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
140      weights = constant_op.constant([], dtype)
141      return gen_math_ops.bincount(arr, output_size, weights)
142
143    if not isinstance(arr, sparse_tensor.SparseTensor):
144      arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr")
145    if weights is not None:
146      if not isinstance(weights, sparse_tensor.SparseTensor):
147        weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
148            weights, name="weights")
149
150    if weights is not None and binary_output:
151      raise ValueError("binary_output and weights are mutually exclusive.")
152
153    if not arr.dtype.is_integer:
154      arr = math_ops.cast(arr, dtypes.int32)
155    if axis is None:
156      axis = 0
157
158    if axis not in [0, -1]:
159      raise ValueError("Unsupported axis value %s. Only 0 and -1 are currently "
160                       "supported." % axis)
161
162    if isinstance(arr, ragged_tensor.RaggedTensor):
163      array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr.values)) > 0
164    else:
165      array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
166    if isinstance(arr, sparse_tensor.SparseTensor):
167      output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
168          math_ops.reduce_max(arr.values) + 1)
169    else:
170      output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
171          math_ops.reduce_max(arr) + 1)
172    if minlength is not None:
173      minlength = ops.convert_to_tensor(
174          minlength, name="minlength", dtype=arr.dtype)
175      output_size = gen_math_ops.maximum(minlength, output_size)
176    if maxlength is not None:
177      maxlength = ops.convert_to_tensor(
178          maxlength, name="maxlength", dtype=arr.dtype)
179      output_size = gen_math_ops.minimum(maxlength, output_size)
180
181    if axis == 0:
182      if isinstance(arr, sparse_tensor.SparseTensor):
183        if weights is not None:
184          weights = validate_sparse_weights(arr, weights, dtype)
185        arr = arr.values
186      elif isinstance(arr, ragged_tensor.RaggedTensor):
187        if weights is not None:
188          weights = validate_ragged_weights(arr, weights, dtype)
189        arr = arr.values
190      else:
191        if weights is not None:
192          weights = array_ops.reshape(weights, [-1])
193        arr = array_ops.reshape(arr, [-1])
194
195    if isinstance(arr, sparse_tensor.SparseTensor):
196      weights = validate_sparse_weights(arr, weights, dtype)
197      return gen_math_ops.sparse_bincount(
198          indices=arr.indices,
199          values=arr.values,
200          dense_shape=arr.dense_shape,
201          size=output_size,
202          weights=weights,
203          binary_output=binary_output)
204    elif isinstance(arr, ragged_tensor.RaggedTensor):
205      weights = validate_ragged_weights(arr, weights, dtype)
206      return gen_math_ops.ragged_bincount(
207          splits=arr.row_splits,
208          values=arr.values,
209          size=output_size,
210          weights=weights,
211          binary_output=binary_output)
212    else:
213      weights = validate_dense_weights(arr, weights, dtype)
214      return gen_math_ops.dense_bincount(
215          input=arr,
216          size=output_size,
217          weights=weights,
218          binary_output=binary_output)
219
220
221@tf_export(v1=["math.bincount", "bincount"])
222@deprecation.deprecated_endpoints("bincount")
223def bincount_v1(arr,
224                weights=None,
225                minlength=None,
226                maxlength=None,
227                dtype=dtypes.int32):
228  """Counts the number of occurrences of each value in an integer array.
229
230  If `minlength` and `maxlength` are not given, returns a vector with length
231  `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
232  If `weights` are non-None, then index `i` of the output stores the sum of the
233  value in `weights` at each index where the corresponding value in `arr` is
234  `i`.
235
236  Args:
237    arr: An int32 tensor of non-negative values.
238    weights: If non-None, must be the same shape as arr. For each value in
239      `arr`, the bin will be incremented by the corresponding weight instead of
240      1.
241    minlength: If given, ensures the output has length at least `minlength`,
242      padding with zeros at the end if necessary.
243    maxlength: If given, skips values in `arr` that are equal or greater than
244      `maxlength`, ensuring that the output has length at most `maxlength`.
245    dtype: If `weights` is None, determines the type of the output bins.
246
247  Returns:
248    A vector with the same dtype as `weights` or the given `dtype`. The bin
249    values.
250  """
251  return bincount(arr, weights, minlength, maxlength, dtype)
252
253
254@tf_export("sparse.bincount")
255def sparse_bincount(values,
256                    weights=None,
257                    axis=0,
258                    minlength=None,
259                    maxlength=None,
260                    binary_output=False,
261                    name=None):
262  """Count the number of times an integer value appears in a tensor.
263
264  This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`,
265  and returns an N-dimensional int64 SparseTensor where element
266  `[i0...i[axis], j]` contains the number of times the value `j` appears in
267  slice `[i0...i[axis], :]` of the input tensor.  Currently, only N=0 and
268  N=-1 are supported.
269
270  Args:
271    values: A Tensor, RaggedTensor, or SparseTensor whose values should be
272      counted. These tensors must have a rank of 2 if `axis=-1`.
273    weights: If non-None, must be the same shape as arr. For each value in
274      `value`, the bin will be incremented by the corresponding weight instead
275      of 1.
276    axis: The axis to slice over. Axes at and below `axis` will be flattened
277      before bin counting. Currently, only `0`, and `-1` are supported. If None,
278      all axes will be flattened (identical to passing `0`).
279    minlength: If given, ensures the output has length at least `minlength`,
280      padding with zeros at the end if necessary.
281    maxlength: If given, skips values in `values` that are equal or greater than
282      `maxlength`, ensuring that the output has length at most `maxlength`.
283    binary_output: If True, this op will output 1 instead of the number of times
284      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
285      reduce_add). Defaults to False.
286    name: A name for this op.
287
288  Returns:
289    A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is
290      * `maxlength` (if set);
291      * `minlength` (if set, and `minlength > reduce_max(values)`);
292      * `0` (if `values` is empty);
293      * `reduce_max(values) + 1` otherwise.
294
295
296  Examples:
297
298  **Bin-counting every item in individual batches**
299
300  This example takes an input (which could be a Tensor, RaggedTensor, or
301  SparseTensor) and returns a SparseTensor where the value of (i,j) is the
302  number of times value j appears in batch i.
303
304  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
305  >>> output = tf.sparse.bincount(data, axis=-1)
306  >>> print(output)
307  SparseTensor(indices=tf.Tensor(
308  [[    0    10]
309   [    0    20]
310   [    0    30]
311   [    1    11]
312   [    1   101]
313   [    1 10001]], shape=(6, 2), dtype=int64),
314   values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64),
315   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
316
317  **Bin-counting with defined output shape**
318
319  This example takes an input (which could be a Tensor, RaggedTensor, or
320  SparseTensor) and returns a SparseTensor where the value of (i,j) is the
321  number of times value j appears in batch i. However, all values of j
322  above 'maxlength' are ignored. The dense_shape of the output sparse tensor
323  is set to 'minlength'. Note that, while the input is identical to the
324  example above, the value '10001' in batch item 2 is dropped, and the
325  dense shape is [2, 500] instead of [2,10002] or [2, 102].
326
327  >>> minlength = maxlength = 500
328  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
329  >>> output = tf.sparse.bincount(
330  ...    data, axis=-1, minlength=minlength, maxlength=maxlength)
331  >>> print(output)
332  SparseTensor(indices=tf.Tensor(
333  [[  0  10]
334   [  0  20]
335   [  0  30]
336   [  1  11]
337   [  1 101]], shape=(5, 2), dtype=int64),
338   values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64),
339   dense_shape=tf.Tensor([  2 500], shape=(2,), dtype=int64))
340
341  **Binary bin-counting**
342
343  This example takes an input (which could be a Tensor, RaggedTensor, or
344  SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j
345  appears in batch i at least once and is 0 otherwise. Note that, even though
346  some values (like 20 in batch 1 and 11 in batch 2) appear more than once,
347  the 'values' tensor is all 1s.
348
349  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
350  >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1)
351  >>> print(output)
352  SparseTensor(indices=tf.Tensor(
353  [[    0    10]
354   [    0    20]
355   [    0    30]
356   [    1    11]
357   [    1   101]
358   [    1 10001]], shape=(6, 2), dtype=int64),
359   values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64),
360   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
361
362  **Weighted bin-counting**
363
364  This example takes two inputs - a values tensor and a weights tensor. These
365  tensors must be identically shaped, and have the same row splits or indices
366  in the case of RaggedTensors or SparseTensors. When performing a weighted
367  count, the op will output a SparseTensor where the value of (i, j) is the
368  sum of the values in the weight tensor's batch i in the locations where
369  the values tensor has the value j. In this case, the output dtype is the
370  same as the dtype of the weights tensor.
371
372  >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
373  >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]]
374  >>> output = tf.sparse.bincount(data, weights=weights, axis=-1)
375  >>> print(output)
376  SparseTensor(indices=tf.Tensor(
377  [[    0    10]
378   [    0    20]
379   [    0    30]
380   [    1    11]
381   [    1   101]
382   [    1 10001]], shape=(6, 2), dtype=int64),
383   values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32),
384   dense_shape=tf.Tensor([    2 10002], shape=(2,), dtype=int64))
385
386  """
387  with ops.name_scope(name, "count", [values, weights]):
388    if not isinstance(values, sparse_tensor.SparseTensor):
389      values = ragged_tensor.convert_to_tensor_or_ragged_tensor(
390          values, name="values")
391    if weights is not None:
392      if not isinstance(weights, sparse_tensor.SparseTensor):
393        weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
394            weights, name="weights")
395
396    if weights is not None and binary_output:
397      raise ValueError("binary_output and weights are mutually exclusive.")
398
399    if axis is None:
400      axis = 0
401
402    if axis not in [0, -1]:
403      raise ValueError("Unsupported axis value %s. Only 0 and -1 are currently "
404                       "supported." % axis)
405
406    minlength_value = minlength if minlength is not None else -1
407    maxlength_value = maxlength if maxlength is not None else -1
408
409    if axis == 0:
410      if isinstance(values, sparse_tensor.SparseTensor):
411        if weights is not None:
412          weights = validate_sparse_weights(values, weights)
413        values = values.values
414      elif isinstance(values, ragged_tensor.RaggedTensor):
415        if weights is not None:
416          weights = validate_ragged_weights(values, weights)
417        values = values.values
418      else:
419        if weights is not None:
420          weights = array_ops.reshape(weights, [-1])
421        values = array_ops.reshape(values, [-1])
422
423    if isinstance(values, sparse_tensor.SparseTensor):
424      weights = validate_sparse_weights(values, weights)
425      c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output(
426          values.indices,
427          values.values,
428          values.dense_shape,
429          weights,
430          minlength=minlength_value,
431          maxlength=maxlength_value,
432          binary_output=binary_output)
433    elif isinstance(values, ragged_tensor.RaggedTensor):
434      weights = validate_ragged_weights(values, weights)
435      c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output(
436          values.row_splits,
437          values.values,
438          weights,
439          minlength=minlength_value,
440          maxlength=maxlength_value,
441          binary_output=binary_output)
442    else:
443      weights = validate_dense_weights(values, weights)
444      c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output(
445          values,
446          weights=weights,
447          minlength=minlength_value,
448          maxlength=maxlength_value,
449          binary_output=binary_output)
450
451    return sparse_tensor.SparseTensor(c_ind, c_val, c_shape)
452
453
454def validate_dense_weights(values, weights, dtype=None):
455  """Validates the passed weight tensor or creates an empty one."""
456  if weights is None:
457    if dtype:
458      return array_ops.constant([], dtype=dtype)
459    return array_ops.constant([], dtype=values.dtype)
460
461  if not isinstance(weights, ops.Tensor):
462    raise ValueError(
463        "`weights` must be a tf.Tensor if `values` is a tf.Tensor.")
464
465  return weights
466
467
468def validate_sparse_weights(values, weights, dtype=None):
469  """Validates the passed weight tensor or creates an empty one."""
470  if weights is None:
471    if dtype:
472      return array_ops.constant([], dtype=dtype)
473    return array_ops.constant([], dtype=values.values.dtype)
474
475  if not isinstance(weights, sparse_tensor.SparseTensor):
476    raise ValueError(
477        "`weights` must be a SparseTensor if `values` is a SparseTensor.")
478
479  checks = []
480  if weights.dense_shape is not values.dense_shape:
481    checks.append(
482        check_ops.assert_equal(
483            weights.dense_shape,
484            values.dense_shape,
485            message="'weights' and 'values' must have the same dense shape."))
486  if weights.indices is not values.indices:
487    checks.append(
488        check_ops.assert_equal(
489            weights.indices,
490            values.indices,
491            message="'weights' and 'values' must have the same indices.")
492    )
493  if checks:
494    with ops.control_dependencies(checks):
495      weights = array_ops.identity(weights.values)
496  else:
497    weights = weights.values
498
499  return weights
500
501
502def validate_ragged_weights(values, weights, dtype=None):
503  """Validates the passed weight tensor or creates an empty one."""
504  if weights is None:
505    if dtype:
506      return array_ops.constant([], dtype=dtype)
507    return array_ops.constant([], dtype=values.values.dtype)
508
509  if not isinstance(weights, ragged_tensor.RaggedTensor):
510    raise ValueError(
511        "`weights` must be a RaggedTensor if `values` is a RaggedTensor.")
512
513  checks = []
514  if weights.row_splits is not values.row_splits:
515    checks.append(
516        check_ops.assert_equal(
517            weights.row_splits,
518            values.row_splits,
519            message="'weights' and 'values' must have the same row splits."))
520  if checks:
521    with ops.control_dependencies(checks):
522      weights = array_ops.identity(weights.values)
523  else:
524    weights = weights.values
525
526  return weights
527