1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# maxlengthations under the License. 14# ============================================================================== 15"""bincount ops.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.framework import constant_op 22from tensorflow.python.framework import dtypes 23from tensorflow.python.framework import ops 24from tensorflow.python.framework import sparse_tensor 25from tensorflow.python.ops import array_ops 26from tensorflow.python.ops import check_ops 27from tensorflow.python.ops import gen_count_ops 28from tensorflow.python.ops import gen_math_ops 29from tensorflow.python.ops import math_ops 30from tensorflow.python.ops.ragged import ragged_tensor 31from tensorflow.python.util import deprecation 32from tensorflow.python.util.tf_export import tf_export 33 34 35@tf_export("math.bincount", v1=[]) 36def bincount(arr, 37 weights=None, 38 minlength=None, 39 maxlength=None, 40 dtype=dtypes.int32, 41 name=None, 42 axis=None, 43 binary_output=False): 44 """Counts the number of occurrences of each value in an integer array. 45 46 If `minlength` and `maxlength` are not given, returns a vector with length 47 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. 48 If `weights` are non-None, then index `i` of the output stores the sum of the 49 value in `weights` at each index where the corresponding value in `arr` is 50 `i`. 51 52 ```python 53 values = tf.constant([1,1,2,3,2,4,4,5]) 54 tf.math.bincount(values) #[0 2 2 1 2 1] 55 ``` 56 Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6 57 will be the vector length. 58 59 Each bin value in the output indicates number of occurrences of the particular 60 index. Here, index 1 in output has a value 2. This indicates value 1 occurs 61 two times in `values`. 62 63 ```python 64 values = tf.constant([1,1,2,3,2,4,4,5]) 65 weights = tf.constant([1,5,0,1,0,5,4,5]) 66 tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5] 67 ``` 68 Bin will be incremented by the corresponding weight instead of 1. 69 Here, index 1 in output has a value 6. This is the summation of weights 70 corresponding to the value in `values`. 71 72 **Bin-counting on a certain axis** 73 74 This example takes a 2 dimensional input and returns a `Tensor` with 75 bincounting on each sample. 76 77 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) 78 >>> tf.math.bincount(data, axis=-1) 79 <tf.Tensor: shape=(2, 4), dtype=int32, numpy= 80 array([[1, 1, 1, 1], 81 [2, 1, 1, 0]], dtype=int32)> 82 83 84 **Bin-counting with binary_output** 85 86 This example gives binary output instead of counting the occurrence. 87 88 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) 89 >>> tf.math.bincount(data, axis=-1, binary_output=True) 90 <tf.Tensor: shape=(2, 4), dtype=int32, numpy= 91 array([[1, 1, 1, 1], 92 [1, 1, 1, 0]], dtype=int32)> 93 94 Args: 95 arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted. 96 These tensors must have a rank of 2 if `axis=-1`. 97 weights: If non-None, must be the same shape as arr. For each value in 98 `arr`, the bin will be incremented by the corresponding weight instead of 99 1. 100 minlength: If given, ensures the output has length at least `minlength`, 101 padding with zeros at the end if necessary. 102 maxlength: If given, skips values in `arr` that are equal or greater than 103 `maxlength`, ensuring that the output has length at most `maxlength`. 104 dtype: If `weights` is None, determines the type of the output bins. 105 name: A name scope for the associated operations (optional). 106 axis: The axis to slice over. Axes at and below `axis` will be flattened 107 before bin counting. Currently, only `0`, and `-1` are supported. If None, 108 all axes will be flattened (identical to passing `0`). 109 binary_output: If True, this op will output 1 instead of the number of times 110 a token appears (equivalent to one_hot + reduce_any instead of one_hot + 111 reduce_add). Defaults to False. 112 113 Returns: 114 A vector with the same dtype as `weights` or the given `dtype`. The bin 115 values. 116 117 Raises: 118 `InvalidArgumentError` if negative values are provided as an input. 119 120 """ 121 name = "bincount" if name is None else name 122 with ops.name_scope(name): 123 # Somehow forward compatible needs to be False. 124 if not binary_output and axis is None: 125 arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32) 126 array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 127 output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * ( 128 math_ops.reduce_max(arr) + 1) 129 if minlength is not None: 130 minlength = ops.convert_to_tensor( 131 minlength, name="minlength", dtype=dtypes.int32) 132 output_size = gen_math_ops.maximum(minlength, output_size) 133 if maxlength is not None: 134 maxlength = ops.convert_to_tensor( 135 maxlength, name="maxlength", dtype=dtypes.int32) 136 output_size = gen_math_ops.minimum(maxlength, output_size) 137 if weights is not None: 138 weights = ops.convert_to_tensor(weights, name="weights") 139 return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) 140 weights = constant_op.constant([], dtype) 141 return gen_math_ops.bincount(arr, output_size, weights) 142 143 if not isinstance(arr, sparse_tensor.SparseTensor): 144 arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr") 145 if weights is not None: 146 if not isinstance(weights, sparse_tensor.SparseTensor): 147 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( 148 weights, name="weights") 149 150 if weights is not None and binary_output: 151 raise ValueError("binary_output and weights are mutually exclusive.") 152 153 if not arr.dtype.is_integer: 154 arr = math_ops.cast(arr, dtypes.int32) 155 if axis is None: 156 axis = 0 157 158 if axis not in [0, -1]: 159 raise ValueError("Unsupported axis value %s. Only 0 and -1 are currently " 160 "supported." % axis) 161 162 if isinstance(arr, ragged_tensor.RaggedTensor): 163 array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr.values)) > 0 164 else: 165 array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 166 if isinstance(arr, sparse_tensor.SparseTensor): 167 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( 168 math_ops.reduce_max(arr.values) + 1) 169 else: 170 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( 171 math_ops.reduce_max(arr) + 1) 172 if minlength is not None: 173 minlength = ops.convert_to_tensor( 174 minlength, name="minlength", dtype=arr.dtype) 175 output_size = gen_math_ops.maximum(minlength, output_size) 176 if maxlength is not None: 177 maxlength = ops.convert_to_tensor( 178 maxlength, name="maxlength", dtype=arr.dtype) 179 output_size = gen_math_ops.minimum(maxlength, output_size) 180 181 if axis == 0: 182 if isinstance(arr, sparse_tensor.SparseTensor): 183 if weights is not None: 184 weights = validate_sparse_weights(arr, weights, dtype) 185 arr = arr.values 186 elif isinstance(arr, ragged_tensor.RaggedTensor): 187 if weights is not None: 188 weights = validate_ragged_weights(arr, weights, dtype) 189 arr = arr.values 190 else: 191 if weights is not None: 192 weights = array_ops.reshape(weights, [-1]) 193 arr = array_ops.reshape(arr, [-1]) 194 195 if isinstance(arr, sparse_tensor.SparseTensor): 196 weights = validate_sparse_weights(arr, weights, dtype) 197 return gen_math_ops.sparse_bincount( 198 indices=arr.indices, 199 values=arr.values, 200 dense_shape=arr.dense_shape, 201 size=output_size, 202 weights=weights, 203 binary_output=binary_output) 204 elif isinstance(arr, ragged_tensor.RaggedTensor): 205 weights = validate_ragged_weights(arr, weights, dtype) 206 return gen_math_ops.ragged_bincount( 207 splits=arr.row_splits, 208 values=arr.values, 209 size=output_size, 210 weights=weights, 211 binary_output=binary_output) 212 else: 213 weights = validate_dense_weights(arr, weights, dtype) 214 return gen_math_ops.dense_bincount( 215 input=arr, 216 size=output_size, 217 weights=weights, 218 binary_output=binary_output) 219 220 221@tf_export(v1=["math.bincount", "bincount"]) 222@deprecation.deprecated_endpoints("bincount") 223def bincount_v1(arr, 224 weights=None, 225 minlength=None, 226 maxlength=None, 227 dtype=dtypes.int32): 228 """Counts the number of occurrences of each value in an integer array. 229 230 If `minlength` and `maxlength` are not given, returns a vector with length 231 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. 232 If `weights` are non-None, then index `i` of the output stores the sum of the 233 value in `weights` at each index where the corresponding value in `arr` is 234 `i`. 235 236 Args: 237 arr: An int32 tensor of non-negative values. 238 weights: If non-None, must be the same shape as arr. For each value in 239 `arr`, the bin will be incremented by the corresponding weight instead of 240 1. 241 minlength: If given, ensures the output has length at least `minlength`, 242 padding with zeros at the end if necessary. 243 maxlength: If given, skips values in `arr` that are equal or greater than 244 `maxlength`, ensuring that the output has length at most `maxlength`. 245 dtype: If `weights` is None, determines the type of the output bins. 246 247 Returns: 248 A vector with the same dtype as `weights` or the given `dtype`. The bin 249 values. 250 """ 251 return bincount(arr, weights, minlength, maxlength, dtype) 252 253 254@tf_export("sparse.bincount") 255def sparse_bincount(values, 256 weights=None, 257 axis=0, 258 minlength=None, 259 maxlength=None, 260 binary_output=False, 261 name=None): 262 """Count the number of times an integer value appears in a tensor. 263 264 This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`, 265 and returns an N-dimensional int64 SparseTensor where element 266 `[i0...i[axis], j]` contains the number of times the value `j` appears in 267 slice `[i0...i[axis], :]` of the input tensor. Currently, only N=0 and 268 N=-1 are supported. 269 270 Args: 271 values: A Tensor, RaggedTensor, or SparseTensor whose values should be 272 counted. These tensors must have a rank of 2 if `axis=-1`. 273 weights: If non-None, must be the same shape as arr. For each value in 274 `value`, the bin will be incremented by the corresponding weight instead 275 of 1. 276 axis: The axis to slice over. Axes at and below `axis` will be flattened 277 before bin counting. Currently, only `0`, and `-1` are supported. If None, 278 all axes will be flattened (identical to passing `0`). 279 minlength: If given, ensures the output has length at least `minlength`, 280 padding with zeros at the end if necessary. 281 maxlength: If given, skips values in `values` that are equal or greater than 282 `maxlength`, ensuring that the output has length at most `maxlength`. 283 binary_output: If True, this op will output 1 instead of the number of times 284 a token appears (equivalent to one_hot + reduce_any instead of one_hot + 285 reduce_add). Defaults to False. 286 name: A name for this op. 287 288 Returns: 289 A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is 290 * `maxlength` (if set); 291 * `minlength` (if set, and `minlength > reduce_max(values)`); 292 * `0` (if `values` is empty); 293 * `reduce_max(values) + 1` otherwise. 294 295 296 Examples: 297 298 **Bin-counting every item in individual batches** 299 300 This example takes an input (which could be a Tensor, RaggedTensor, or 301 SparseTensor) and returns a SparseTensor where the value of (i,j) is the 302 number of times value j appears in batch i. 303 304 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 305 >>> output = tf.sparse.bincount(data, axis=-1) 306 >>> print(output) 307 SparseTensor(indices=tf.Tensor( 308 [[ 0 10] 309 [ 0 20] 310 [ 0 30] 311 [ 1 11] 312 [ 1 101] 313 [ 1 10001]], shape=(6, 2), dtype=int64), 314 values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64), 315 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 316 317 **Bin-counting with defined output shape** 318 319 This example takes an input (which could be a Tensor, RaggedTensor, or 320 SparseTensor) and returns a SparseTensor where the value of (i,j) is the 321 number of times value j appears in batch i. However, all values of j 322 above 'maxlength' are ignored. The dense_shape of the output sparse tensor 323 is set to 'minlength'. Note that, while the input is identical to the 324 example above, the value '10001' in batch item 2 is dropped, and the 325 dense shape is [2, 500] instead of [2,10002] or [2, 102]. 326 327 >>> minlength = maxlength = 500 328 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 329 >>> output = tf.sparse.bincount( 330 ... data, axis=-1, minlength=minlength, maxlength=maxlength) 331 >>> print(output) 332 SparseTensor(indices=tf.Tensor( 333 [[ 0 10] 334 [ 0 20] 335 [ 0 30] 336 [ 1 11] 337 [ 1 101]], shape=(5, 2), dtype=int64), 338 values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64), 339 dense_shape=tf.Tensor([ 2 500], shape=(2,), dtype=int64)) 340 341 **Binary bin-counting** 342 343 This example takes an input (which could be a Tensor, RaggedTensor, or 344 SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j 345 appears in batch i at least once and is 0 otherwise. Note that, even though 346 some values (like 20 in batch 1 and 11 in batch 2) appear more than once, 347 the 'values' tensor is all 1s. 348 349 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 350 >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1) 351 >>> print(output) 352 SparseTensor(indices=tf.Tensor( 353 [[ 0 10] 354 [ 0 20] 355 [ 0 30] 356 [ 1 11] 357 [ 1 101] 358 [ 1 10001]], shape=(6, 2), dtype=int64), 359 values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64), 360 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 361 362 **Weighted bin-counting** 363 364 This example takes two inputs - a values tensor and a weights tensor. These 365 tensors must be identically shaped, and have the same row splits or indices 366 in the case of RaggedTensors or SparseTensors. When performing a weighted 367 count, the op will output a SparseTensor where the value of (i, j) is the 368 sum of the values in the weight tensor's batch i in the locations where 369 the values tensor has the value j. In this case, the output dtype is the 370 same as the dtype of the weights tensor. 371 372 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 373 >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]] 374 >>> output = tf.sparse.bincount(data, weights=weights, axis=-1) 375 >>> print(output) 376 SparseTensor(indices=tf.Tensor( 377 [[ 0 10] 378 [ 0 20] 379 [ 0 30] 380 [ 1 11] 381 [ 1 101] 382 [ 1 10001]], shape=(6, 2), dtype=int64), 383 values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32), 384 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 385 386 """ 387 with ops.name_scope(name, "count", [values, weights]): 388 if not isinstance(values, sparse_tensor.SparseTensor): 389 values = ragged_tensor.convert_to_tensor_or_ragged_tensor( 390 values, name="values") 391 if weights is not None: 392 if not isinstance(weights, sparse_tensor.SparseTensor): 393 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( 394 weights, name="weights") 395 396 if weights is not None and binary_output: 397 raise ValueError("binary_output and weights are mutually exclusive.") 398 399 if axis is None: 400 axis = 0 401 402 if axis not in [0, -1]: 403 raise ValueError("Unsupported axis value %s. Only 0 and -1 are currently " 404 "supported." % axis) 405 406 minlength_value = minlength if minlength is not None else -1 407 maxlength_value = maxlength if maxlength is not None else -1 408 409 if axis == 0: 410 if isinstance(values, sparse_tensor.SparseTensor): 411 if weights is not None: 412 weights = validate_sparse_weights(values, weights) 413 values = values.values 414 elif isinstance(values, ragged_tensor.RaggedTensor): 415 if weights is not None: 416 weights = validate_ragged_weights(values, weights) 417 values = values.values 418 else: 419 if weights is not None: 420 weights = array_ops.reshape(weights, [-1]) 421 values = array_ops.reshape(values, [-1]) 422 423 if isinstance(values, sparse_tensor.SparseTensor): 424 weights = validate_sparse_weights(values, weights) 425 c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output( 426 values.indices, 427 values.values, 428 values.dense_shape, 429 weights, 430 minlength=minlength_value, 431 maxlength=maxlength_value, 432 binary_output=binary_output) 433 elif isinstance(values, ragged_tensor.RaggedTensor): 434 weights = validate_ragged_weights(values, weights) 435 c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output( 436 values.row_splits, 437 values.values, 438 weights, 439 minlength=minlength_value, 440 maxlength=maxlength_value, 441 binary_output=binary_output) 442 else: 443 weights = validate_dense_weights(values, weights) 444 c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output( 445 values, 446 weights=weights, 447 minlength=minlength_value, 448 maxlength=maxlength_value, 449 binary_output=binary_output) 450 451 return sparse_tensor.SparseTensor(c_ind, c_val, c_shape) 452 453 454def validate_dense_weights(values, weights, dtype=None): 455 """Validates the passed weight tensor or creates an empty one.""" 456 if weights is None: 457 if dtype: 458 return array_ops.constant([], dtype=dtype) 459 return array_ops.constant([], dtype=values.dtype) 460 461 if not isinstance(weights, ops.Tensor): 462 raise ValueError( 463 "`weights` must be a tf.Tensor if `values` is a tf.Tensor.") 464 465 return weights 466 467 468def validate_sparse_weights(values, weights, dtype=None): 469 """Validates the passed weight tensor or creates an empty one.""" 470 if weights is None: 471 if dtype: 472 return array_ops.constant([], dtype=dtype) 473 return array_ops.constant([], dtype=values.values.dtype) 474 475 if not isinstance(weights, sparse_tensor.SparseTensor): 476 raise ValueError( 477 "`weights` must be a SparseTensor if `values` is a SparseTensor.") 478 479 checks = [] 480 if weights.dense_shape is not values.dense_shape: 481 checks.append( 482 check_ops.assert_equal( 483 weights.dense_shape, 484 values.dense_shape, 485 message="'weights' and 'values' must have the same dense shape.")) 486 if weights.indices is not values.indices: 487 checks.append( 488 check_ops.assert_equal( 489 weights.indices, 490 values.indices, 491 message="'weights' and 'values' must have the same indices.") 492 ) 493 if checks: 494 with ops.control_dependencies(checks): 495 weights = array_ops.identity(weights.values) 496 else: 497 weights = weights.values 498 499 return weights 500 501 502def validate_ragged_weights(values, weights, dtype=None): 503 """Validates the passed weight tensor or creates an empty one.""" 504 if weights is None: 505 if dtype: 506 return array_ops.constant([], dtype=dtype) 507 return array_ops.constant([], dtype=values.values.dtype) 508 509 if not isinstance(weights, ragged_tensor.RaggedTensor): 510 raise ValueError( 511 "`weights` must be a RaggedTensor if `values` is a RaggedTensor.") 512 513 checks = [] 514 if weights.row_splits is not values.row_splits: 515 checks.append( 516 check_ops.assert_equal( 517 weights.row_splits, 518 values.row_splits, 519 message="'weights' and 'values' must have the same row splits.")) 520 if checks: 521 with ops.control_dependencies(checks): 522 weights = array_ops.identity(weights.values) 523 else: 524 weights = weights.values 525 526 return weights 527