1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Implementation of Loss operations for use in neural networks.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.eager import context 22from tensorflow.python.framework import dtypes 23from tensorflow.python.framework import ops 24from tensorflow.python.ops import array_ops 25from tensorflow.python.ops import confusion_matrix 26from tensorflow.python.ops import control_flow_ops 27from tensorflow.python.ops import math_ops 28from tensorflow.python.ops import nn 29from tensorflow.python.ops import nn_ops 30from tensorflow.python.ops import weights_broadcast_ops 31from tensorflow.python.ops.losses import util 32from tensorflow.python.util.deprecation import deprecated_args 33from tensorflow.python.util.deprecation import deprecated_argument_lookup 34from tensorflow.python.util.tf_export import tf_export 35 36 37@tf_export(v1=["losses.Reduction"]) 38class Reduction(object): 39 """Types of loss reduction. 40 41 Contains the following values: 42 43 * `NONE`: Un-reduced weighted losses with the same shape as input. 44 * `SUM`: Scalar sum of weighted losses. 45 * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED. 46 * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. 47 * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero 48 weights. DEPRECATED. 49 * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED. 50 """ 51 52 NONE = "none" 53 SUM = "weighted_sum" 54 SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size" 55 MEAN = "weighted_mean" 56 SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights" 57 SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS 58 59 @classmethod 60 def all(cls): 61 return ( 62 cls.NONE, 63 cls.SUM, 64 cls.MEAN, 65 cls.SUM_OVER_BATCH_SIZE, 66 cls.SUM_OVER_NONZERO_WEIGHTS, 67 cls.SUM_BY_NONZERO_WEIGHTS) 68 69 @classmethod 70 def validate(cls, key): 71 if key not in cls.all(): 72 raise ValueError("Invalid Reduction Key %s." % key) 73 74 75def _safe_mean(losses, num_present): 76 """Computes a safe mean of the losses. 77 78 Args: 79 losses: `Tensor` whose elements contain individual loss measurements. 80 num_present: The number of measurable elements in `losses`. 81 82 Returns: 83 A scalar representing the mean of `losses`. If `num_present` is zero, 84 then zero is returned. 85 """ 86 total_loss = math_ops.reduce_sum(losses) 87 return math_ops.div_no_nan(total_loss, num_present, name="value") 88 89 90def _num_present(losses, weights, per_batch=False): 91 """Computes the number of elements in the loss function induced by `weights`. 92 93 A given weights tensor induces different numbers of usable elements in the 94 `losses` tensor. The `weights` tensor is broadcast across `losses` for all 95 possible dimensions. For example, if `losses` is a tensor of dimension 96 `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is, 97 in effect, tiled to match the shape of `losses`. Following this effective 98 tile, the total number of present elements is the number of non-zero weights. 99 100 Args: 101 losses: `Tensor` of shape `[batch_size, d1, ... dN]`. 102 weights: `Tensor` of shape `[]`, `[batch_size]` or 103 `[batch_size, d1, ... dK]`, where K < N. 104 per_batch: Whether to return the number of elements per batch or as a sum 105 total. 106 107 Returns: 108 The number of present (non-zero) elements in the losses tensor. If 109 `per_batch` is `True`, the value is returned as a tensor of size 110 `[batch_size]`. Otherwise, a single scalar tensor is returned. 111 """ 112 if ((isinstance(weights, float) and weights != 0.0) or 113 (context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access 114 and not math_ops.equal(weights, 0.0))): 115 return _num_elements(losses) 116 with ops.name_scope(None, "num_present", (losses, weights)) as scope: 117 weights = math_ops.cast(weights, dtype=dtypes.float32) 118 present = array_ops.where( 119 math_ops.equal(weights, 0.0), 120 array_ops.zeros_like(weights), 121 array_ops.ones_like(weights)) 122 present = weights_broadcast_ops.broadcast_weights(present, losses) 123 if per_batch: 124 return math_ops.reduce_sum( 125 present, 126 axis=math_ops.range(1, array_ops.rank(present)), 127 keepdims=True, 128 name=scope) 129 return math_ops.reduce_sum(present, name=scope) 130 131 132def _num_elements(losses): 133 """Computes the number of elements in `losses` tensor.""" 134 with ops.name_scope(None, "num_elements", values=[losses]) as scope: 135 return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype) 136 137 138@tf_export(v1=["losses.compute_weighted_loss"]) 139def compute_weighted_loss( 140 losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, 141 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 142 """Computes the weighted loss. 143 144 Args: 145 losses: `Tensor` of shape `[batch_size, d1, ... dN]`. 146 weights: Optional `Tensor` whose rank is either 0, or the same rank as 147 `losses`, and must be broadcastable to `losses` (i.e., all dimensions must 148 be either `1`, or the same as the corresponding `losses` dimension). 149 scope: the scope for the operations performed in computing the loss. 150 loss_collection: the loss will be added to these collections. 151 reduction: Type of reduction to apply to loss. 152 153 Returns: 154 Weighted loss `Tensor` of the same type as `losses`. If `reduction` is 155 `NONE`, this has the same shape as `losses`; otherwise, it is scalar. 156 157 Raises: 158 ValueError: If `weights` is `None` or the shape is not compatible with 159 `losses`, or if the number of dimensions (rank) of either `losses` or 160 `weights` is missing. 161 162 Note: 163 When calculating the gradient of a weighted loss contributions from 164 both `losses` and `weights` are considered. If your `weights` depend 165 on some model parameters but you do not want this to affect the loss 166 gradient, you need to apply `tf.stop_gradient` to `weights` before 167 passing them to `compute_weighted_loss`. 168 169 @compatibility(eager) 170 The `loss_collection` argument is ignored when executing eagerly. Consider 171 holding on to the return value or collecting losses via a `tf.keras.Model`. 172 @end_compatibility 173 """ 174 Reduction.validate(reduction) 175 with ops.name_scope(scope, "weighted_loss", (losses, weights)): 176 # Save the `reduction` argument for loss normalization when distributing 177 # to multiple replicas. Used only for estimator + v1 optimizer flow. 178 ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access 179 180 with ops.control_dependencies(( 181 weights_broadcast_ops.assert_broadcastable(weights, losses),)): 182 losses = ops.convert_to_tensor(losses) 183 input_dtype = losses.dtype 184 losses = math_ops.cast(losses, dtype=dtypes.float32) 185 weights = math_ops.cast(weights, dtype=dtypes.float32) 186 weighted_losses = math_ops.multiply(losses, weights) 187 if reduction == Reduction.NONE: 188 loss = weighted_losses 189 else: 190 loss = math_ops.reduce_sum(weighted_losses) 191 if reduction == Reduction.MEAN: 192 loss = _safe_mean( 193 loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights)) 194 elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or 195 reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS): 196 loss = _safe_mean(loss, _num_present(losses, weights)) 197 elif reduction == Reduction.SUM_OVER_BATCH_SIZE: 198 loss = _safe_mean(loss, _num_elements(losses)) 199 200 # Convert the result back to the input type. 201 loss = math_ops.cast(loss, input_dtype) 202 util.add_loss(loss, loss_collection) 203 return loss 204 205 206@tf_export(v1=["losses.absolute_difference"]) 207def absolute_difference( 208 labels, predictions, weights=1.0, scope=None, 209 loss_collection=ops.GraphKeys.LOSSES, 210 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 211 """Adds an Absolute Difference loss to the training procedure. 212 213 `weights` acts as a coefficient for the loss. If a scalar is provided, then 214 the loss is simply scaled by the given value. If `weights` is a `Tensor` of 215 shape `[batch_size]`, then the total loss for each sample of the batch is 216 rescaled by the corresponding element in the `weights` vector. If the shape of 217 `weights` matches the shape of `predictions`, then the loss of each 218 measurable element of `predictions` is scaled by the corresponding value of 219 `weights`. 220 221 Args: 222 labels: The ground truth output tensor, same dimensions as 'predictions'. 223 predictions: The predicted outputs. 224 weights: Optional `Tensor` whose rank is either 0, or the same rank as 225 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 226 be either `1`, or the same as the corresponding `losses` dimension). 227 scope: The scope for the operations performed in computing the loss. 228 loss_collection: collection to which this loss will be added. 229 reduction: Type of reduction to apply to loss. 230 231 Returns: 232 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 233 shape as `labels`; otherwise, it is scalar. 234 235 Raises: 236 ValueError: If the shape of `predictions` doesn't match that of 237 `labels` or if the shape of `weights` is invalid or if `labels` 238 or `predictions` is None. 239 240 @compatibility(eager) 241 The `loss_collection` argument is ignored when executing eagerly. Consider 242 holding on to the return value or collecting losses via a `tf.keras.Model`. 243 @end_compatibility 244 """ 245 if labels is None: 246 raise ValueError("labels must not be None.") 247 if predictions is None: 248 raise ValueError("predictions must not be None.") 249 with ops.name_scope(scope, "absolute_difference", 250 (predictions, labels, weights)) as scope: 251 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 252 labels = math_ops.cast(labels, dtype=dtypes.float32) 253 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 254 losses = math_ops.abs(math_ops.subtract(predictions, labels)) 255 return compute_weighted_loss( 256 losses, weights, scope, loss_collection, reduction=reduction) 257 258 259@tf_export(v1=["losses.cosine_distance"]) 260@deprecated_args(None, "dim is deprecated, use axis instead", "dim") 261def cosine_distance( 262 labels, predictions, axis=None, weights=1.0, scope=None, 263 loss_collection=ops.GraphKeys.LOSSES, 264 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS, 265 dim=None): 266 """Adds a cosine-distance loss to the training procedure. 267 268 Note that the function assumes that `predictions` and `labels` are already 269 unit-normalized. 270 271 Args: 272 labels: `Tensor` whose shape matches 'predictions' 273 predictions: An arbitrary matrix. 274 axis: The dimension along which the cosine distance is computed. 275 weights: Optional `Tensor` whose rank is either 0, or the same rank as 276 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 277 be either `1`, or the same as the corresponding `losses` dimension). 278 scope: The scope for the operations performed in computing the loss. 279 loss_collection: collection to which this loss will be added. 280 reduction: Type of reduction to apply to loss. 281 dim: The old (deprecated) name for `axis`. 282 283 Returns: 284 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 285 shape as `labels`; otherwise, it is scalar. 286 287 Raises: 288 ValueError: If `predictions` shape doesn't match `labels` shape, or 289 `axis`, `labels`, `predictions` or `weights` is `None`. 290 291 @compatibility(eager) 292 The `loss_collection` argument is ignored when executing eagerly. Consider 293 holding on to the return value or collecting losses via a `tf.keras.Model`. 294 @end_compatibility 295 """ 296 axis = deprecated_argument_lookup("axis", axis, "dim", dim) 297 if axis is None: 298 raise ValueError("You must specify 'axis'.") 299 if labels is None: 300 raise ValueError("labels must not be None.") 301 if predictions is None: 302 raise ValueError("predictions must not be None.") 303 with ops.name_scope(scope, "cosine_distance_loss", 304 (predictions, labels, weights)) as scope: 305 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 306 labels = math_ops.cast(labels, dtype=dtypes.float32) 307 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 308 309 radial_diffs = math_ops.multiply(predictions, labels) 310 losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True) 311 return compute_weighted_loss( 312 losses, weights, scope, loss_collection, reduction=reduction) 313 314 315@tf_export(v1=["losses.hinge_loss"]) 316def hinge_loss(labels, logits, weights=1.0, scope=None, 317 loss_collection=ops.GraphKeys.LOSSES, 318 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 319 """Adds a hinge loss to the training procedure. 320 321 Args: 322 labels: The ground truth output tensor. Its shape should match the shape of 323 logits. The values of the tensor are expected to be 0.0 or 1.0. Internally 324 the {0,1} labels are converted to {-1,1} when calculating the hinge loss. 325 logits: The logits, a float tensor. Note that logits are assumed to be 326 unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive 327 (resp. negative) binary prediction. 328 weights: Optional `Tensor` whose rank is either 0, or the same rank as 329 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 330 be either `1`, or the same as the corresponding `losses` dimension). 331 scope: The scope for the operations performed in computing the loss. 332 loss_collection: collection to which the loss will be added. 333 reduction: Type of reduction to apply to loss. 334 335 Returns: 336 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 337 shape as `labels`; otherwise, it is scalar. 338 339 Raises: 340 ValueError: If the shapes of `logits` and `labels` don't match or 341 if `labels` or `logits` is None. 342 343 @compatibility(eager) 344 The `loss_collection` argument is ignored when executing eagerly. Consider 345 holding on to the return value or collecting losses via a `tf.keras.Model`. 346 @end_compatibility 347 """ 348 if labels is None: 349 raise ValueError("labels must not be None.") 350 if logits is None: 351 raise ValueError("logits must not be None.") 352 with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope: 353 logits = math_ops.cast(logits, dtype=dtypes.float32) 354 labels = math_ops.cast(labels, dtype=dtypes.float32) 355 logits.get_shape().assert_is_compatible_with(labels.get_shape()) 356 # We first need to convert binary labels to -1/1 labels (as floats). 357 all_ones = array_ops.ones_like(labels) 358 labels = math_ops.subtract(2 * labels, all_ones) 359 losses = nn_ops.relu( 360 math_ops.subtract(all_ones, math_ops.multiply(labels, logits))) 361 return compute_weighted_loss( 362 losses, weights, scope, loss_collection, reduction=reduction) 363 364 365@tf_export(v1=["losses.huber_loss"]) 366def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None, 367 loss_collection=ops.GraphKeys.LOSSES, 368 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 369 """Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure. 370 371 For each value x in `error=labels-predictions`, the following is calculated: 372 373 ``` 374 0.5 * x^2 if |x| <= d 375 0.5 * d^2 + d * (|x| - d) if |x| > d 376 ``` 377 378 where d is `delta`. 379 380 `weights` acts as a coefficient for the loss. If a scalar is provided, then 381 the loss is simply scaled by the given value. If `weights` is a tensor of size 382 `[batch_size]`, then the total loss for each sample of the batch is rescaled 383 by the corresponding element in the `weights` vector. If the shape of 384 `weights` matches the shape of `predictions`, then the loss of each 385 measurable element of `predictions` is scaled by the corresponding value of 386 `weights`. 387 388 Args: 389 labels: The ground truth output tensor, same dimensions as 'predictions'. 390 predictions: The predicted outputs. 391 weights: Optional `Tensor` whose rank is either 0, or the same rank as 392 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 393 be either `1`, or the same as the corresponding `losses` dimension). 394 delta: `float`, the point where the huber loss function changes from a 395 quadratic to linear. 396 scope: The scope for the operations performed in computing the loss. 397 loss_collection: collection to which the loss will be added. 398 reduction: Type of reduction to apply to loss. 399 400 Returns: 401 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 402 shape as `labels`; otherwise, it is scalar. 403 404 Raises: 405 ValueError: If the shape of `predictions` doesn't match that of `labels` or 406 if the shape of `weights` is invalid. Also if `labels` or 407 `predictions` is None. 408 409 @compatibility(eager) 410 The `loss_collection` argument is ignored when executing eagerly. Consider 411 holding on to the return value or collecting losses via a `tf.keras.Model`. 412 @end_compatibility 413 """ 414 if labels is None: 415 raise ValueError("labels must not be None.") 416 if predictions is None: 417 raise ValueError("predictions must not be None.") 418 with ops.name_scope(scope, "huber_loss", 419 (predictions, labels, weights)) as scope: 420 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 421 labels = math_ops.cast(labels, dtype=dtypes.float32) 422 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 423 error = math_ops.subtract(predictions, labels) 424 abs_error = math_ops.abs(error) 425 quadratic = math_ops.minimum(abs_error, delta) 426 # The following expression is the same in value as 427 # tf.maximum(abs_error - delta, 0), but importantly the gradient for the 428 # expression when abs_error == delta is 0 (for tf.maximum it would be 1). 429 # This is necessary to avoid doubling the gradient, since there is already a 430 # nonzero contribution to the gradient from the quadratic term. 431 linear = math_ops.subtract(abs_error, quadratic) 432 losses = math_ops.add( 433 math_ops.multiply( 434 ops.convert_to_tensor(0.5, dtype=quadratic.dtype), 435 math_ops.multiply(quadratic, quadratic)), 436 math_ops.multiply(delta, linear)) 437 return compute_weighted_loss( 438 losses, weights, scope, loss_collection, reduction=reduction) 439 440 441@tf_export(v1=["losses.log_loss"]) 442def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None, 443 loss_collection=ops.GraphKeys.LOSSES, 444 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 445 """Adds a Log Loss term to the training procedure. 446 447 `weights` acts as a coefficient for the loss. If a scalar is provided, then 448 the loss is simply scaled by the given value. If `weights` is a tensor of size 449 `[batch_size]`, then the total loss for each sample of the batch is rescaled 450 by the corresponding element in the `weights` vector. If the shape of 451 `weights` matches the shape of `predictions`, then the loss of each 452 measurable element of `predictions` is scaled by the corresponding value of 453 `weights`. 454 455 Args: 456 labels: The ground truth output tensor, same dimensions as 'predictions'. 457 predictions: The predicted outputs. 458 weights: Optional `Tensor` whose rank is either 0, or the same rank as 459 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 460 be either `1`, or the same as the corresponding `losses` dimension). 461 epsilon: A small increment to add to avoid taking a log of zero. 462 scope: The scope for the operations performed in computing the loss. 463 loss_collection: collection to which the loss will be added. 464 reduction: Type of reduction to apply to loss. 465 466 Returns: 467 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 468 shape as `labels`; otherwise, it is scalar. 469 470 Raises: 471 ValueError: If the shape of `predictions` doesn't match that of `labels` or 472 if the shape of `weights` is invalid. Also if `labels` or `predictions` 473 is None. 474 475 @compatibility(eager) 476 The `loss_collection` argument is ignored when executing eagerly. Consider 477 holding on to the return value or collecting losses via a `tf.keras.Model`. 478 @end_compatibility 479 """ 480 if labels is None: 481 raise ValueError("labels must not be None.") 482 if predictions is None: 483 raise ValueError("predictions must not be None.") 484 with ops.name_scope(scope, "log_loss", 485 (predictions, labels, weights)) as scope: 486 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 487 labels = math_ops.cast(labels, dtype=dtypes.float32) 488 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 489 losses = -math_ops.multiply( 490 labels, 491 math_ops.log(predictions + epsilon)) - math_ops.multiply( 492 (1 - labels), math_ops.log(1 - predictions + epsilon)) 493 return compute_weighted_loss( 494 losses, weights, scope, loss_collection, reduction=reduction) 495 496 497# TODO(b/37208492): Add reduction arg. 498@tf_export(v1=["losses.mean_pairwise_squared_error"]) 499def mean_pairwise_squared_error( 500 labels, predictions, weights=1.0, scope=None, 501 loss_collection=ops.GraphKeys.LOSSES): 502 """Adds a pairwise-errors-squared loss to the training procedure. 503 504 Unlike `mean_squared_error`, which is a measure of the differences between 505 corresponding elements of `predictions` and `labels`, 506 `mean_pairwise_squared_error` is a measure of the differences between pairs of 507 corresponding elements of `predictions` and `labels`. 508 509 For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are 510 three pairs of differences are summed to compute the loss: 511 loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 512 513 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the 514 corresponding pairs are computed within each batch sample but not across 515 samples within a batch. For example, if `predictions` represents a batch of 516 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs 517 is drawn from each image, but not across images. 518 519 `weights` acts as a coefficient for the loss. If a scalar is provided, then 520 the loss is simply scaled by the given value. If `weights` is a tensor of size 521 `[batch_size]`, then the total loss for each sample of the batch is rescaled 522 by the corresponding element in the `weights` vector. 523 524 Args: 525 labels: The ground truth output tensor, whose shape must match the shape of 526 `predictions`. 527 predictions: The predicted outputs, a tensor of size 528 `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in 529 `predictions`. 530 weights: Coefficients for the loss a scalar, a tensor of shape 531 `[batch_size]` or a tensor whose shape matches `predictions`. 532 scope: The scope for the operations performed in computing the loss. 533 loss_collection: collection to which the loss will be added. 534 535 Returns: 536 A scalar `Tensor` that returns the weighted loss. 537 538 Raises: 539 ValueError: If the shape of `predictions` doesn't match that of `labels` or 540 if the shape of `weights` is invalid. Also if `labels` or `predictions` 541 is None. 542 543 @compatibility(eager) 544 The `loss_collection` argument is ignored when executing eagerly. Consider 545 holding on to the return value or collecting losses via a `tf.keras.Model`. 546 @end_compatibility 547 """ 548 if labels is None: 549 raise ValueError("labels must not be None.") 550 if predictions is None: 551 raise ValueError("predictions must not be None.") 552 with ops.name_scope(scope, "mean_pairwise_squared_error", 553 (predictions, labels, weights)) as scope: 554 weights = math_ops.cast(weights, dtype=dtypes.float32) 555 labels = math_ops.cast(labels, dtype=dtypes.float32) 556 with ops.control_dependencies(( 557 weights_broadcast_ops.assert_broadcastable(weights, labels),)): 558 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 559 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 560 561 diffs = math_ops.subtract(predictions, labels) 562 563 axis = math_ops.range(1, array_ops.rank(diffs)) 564 565 sum_squares_diff_per_batch = math_ops.reduce_sum( 566 math_ops.square(diffs), axis=axis, keepdims=True) 567 num_present_per_batch = _num_present(diffs, weights, per_batch=True) 568 569 term1 = 2.0 * math_ops.div_no_nan( 570 sum_squares_diff_per_batch, 571 math_ops.maximum(num_present_per_batch - 1, 0), 572 name="value") 573 574 sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) 575 term2 = 2.0 * math_ops.div_no_nan( 576 math_ops.square(sum_diff), 577 math_ops.maximum( 578 math_ops.multiply(num_present_per_batch, 579 num_present_per_batch - 1), 0), 580 name="value") 581 582 weighted_losses = math_ops.multiply(term1 - term2, weights) 583 loss = math_ops.reduce_sum(weighted_losses) 584 585 mean_loss = array_ops.where( 586 math_ops.reduce_sum(num_present_per_batch) > 0, 587 loss, 588 array_ops.zeros_like(loss), 589 name="value") 590 util.add_loss(mean_loss, loss_collection) 591 return mean_loss 592 593 594@tf_export(v1=["losses.mean_squared_error"]) 595def mean_squared_error( 596 labels, predictions, weights=1.0, scope=None, 597 loss_collection=ops.GraphKeys.LOSSES, 598 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 599 """Adds a Sum-of-Squares loss to the training procedure. 600 601 `weights` acts as a coefficient for the loss. If a scalar is provided, then 602 the loss is simply scaled by the given value. If `weights` is a tensor of size 603 `[batch_size]`, then the total loss for each sample of the batch is rescaled 604 by the corresponding element in the `weights` vector. If the shape of 605 `weights` matches the shape of `predictions`, then the loss of each 606 measurable element of `predictions` is scaled by the corresponding value of 607 `weights`. 608 609 Args: 610 labels: The ground truth output tensor, same dimensions as 'predictions'. 611 predictions: The predicted outputs. 612 weights: Optional `Tensor` whose rank is either 0, or the same rank as 613 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 614 be either `1`, or the same as the corresponding `losses` dimension). 615 scope: The scope for the operations performed in computing the loss. 616 loss_collection: collection to which the loss will be added. 617 reduction: Type of reduction to apply to loss. 618 619 Returns: 620 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 621 shape as `labels`; otherwise, it is scalar. 622 623 Raises: 624 ValueError: If the shape of `predictions` doesn't match that of `labels` or 625 if the shape of `weights` is invalid. Also if `labels` or `predictions` 626 is None. 627 628 @compatibility(eager) 629 The `loss_collection` argument is ignored when executing eagerly. Consider 630 holding on to the return value or collecting losses via a `tf.keras.Model`. 631 @end_compatibility 632 """ 633 if labels is None: 634 raise ValueError("labels must not be None.") 635 if predictions is None: 636 raise ValueError("predictions must not be None.") 637 with ops.name_scope(scope, "mean_squared_error", 638 (predictions, labels, weights)) as scope: 639 predictions = math_ops.cast(predictions, dtype=dtypes.float32) 640 labels = math_ops.cast(labels, dtype=dtypes.float32) 641 predictions.get_shape().assert_is_compatible_with(labels.get_shape()) 642 losses = math_ops.squared_difference(predictions, labels) 643 return compute_weighted_loss( 644 losses, weights, scope, loss_collection, reduction=reduction) 645 646 647@tf_export(v1=["losses.sigmoid_cross_entropy"]) 648def sigmoid_cross_entropy( 649 multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None, 650 loss_collection=ops.GraphKeys.LOSSES, 651 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 652 """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits. 653 654 `weights` acts as a coefficient for the loss. If a scalar is provided, 655 then the loss is simply scaled by the given value. If `weights` is a 656 tensor of shape `[batch_size]`, then the loss weights apply to each 657 corresponding sample. 658 659 If `label_smoothing` is nonzero, smooth the labels towards 1/2: 660 661 new_multiclass_labels = multiclass_labels * (1 - label_smoothing) 662 + 0.5 * label_smoothing 663 664 Args: 665 multi_class_labels: `[batch_size, num_classes]` target integer labels in 666 `{0, 1}`. 667 logits: Float `[batch_size, num_classes]` logits outputs of the network. 668 weights: Optional `Tensor` whose rank is either 0, or the same rank as 669 `labels`, and must be broadcastable to `labels` (i.e., all dimensions must 670 be either `1`, or the same as the corresponding `losses` dimension). 671 label_smoothing: If greater than `0` then smooth the labels. 672 scope: The scope for the operations performed in computing the loss. 673 loss_collection: collection to which the loss will be added. 674 reduction: Type of reduction to apply to loss. 675 676 Returns: 677 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 678 `NONE`, this has the same shape as `logits`; otherwise, it is scalar. 679 680 Raises: 681 ValueError: If the shape of `logits` doesn't match that of 682 `multi_class_labels` or if the shape of `weights` is invalid, or if 683 `weights` is None. Also if `multi_class_labels` or `logits` is None. 684 685 @compatibility(eager) 686 The `loss_collection` argument is ignored when executing eagerly. Consider 687 holding on to the return value or collecting losses via a `tf.keras.Model`. 688 @end_compatibility 689 """ 690 if multi_class_labels is None: 691 raise ValueError("multi_class_labels must not be None.") 692 if logits is None: 693 raise ValueError("logits must not be None.") 694 with ops.name_scope(scope, "sigmoid_cross_entropy_loss", 695 (logits, multi_class_labels, weights)) as scope: 696 logits = ops.convert_to_tensor(logits) 697 multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype) 698 logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape()) 699 700 if label_smoothing > 0: 701 multi_class_labels = (multi_class_labels * (1 - label_smoothing) + 702 0.5 * label_smoothing) 703 704 losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels, 705 logits=logits, 706 name="xentropy") 707 return compute_weighted_loss( 708 losses, weights, scope, loss_collection, reduction=reduction) 709 710 711@tf_export(v1=["losses.softmax_cross_entropy"]) 712def softmax_cross_entropy( 713 onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None, 714 loss_collection=ops.GraphKeys.LOSSES, 715 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 716 """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2. 717 718 `weights` acts as a coefficient for the loss. If a scalar is provided, 719 then the loss is simply scaled by the given value. If `weights` is a 720 tensor of shape `[batch_size]`, then the loss weights apply to each 721 corresponding sample. 722 723 If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: 724 new_onehot_labels = onehot_labels * (1 - label_smoothing) 725 + label_smoothing / num_classes 726 727 Note that `onehot_labels` and `logits` must have the same shape, 728 e.g. `[batch_size, num_classes]`. The shape of `weights` must be 729 broadcastable to loss, whose shape is decided by the shape of `logits`. 730 In case the shape of `logits` is `[batch_size, num_classes]`, loss is 731 a `Tensor` of shape `[batch_size]`. 732 733 Args: 734 onehot_labels: One-hot-encoded labels. 735 logits: Logits outputs of the network. 736 weights: Optional `Tensor` that is broadcastable to loss. 737 label_smoothing: If greater than 0 then smooth the labels. 738 scope: the scope for the operations performed in computing the loss. 739 loss_collection: collection to which the loss will be added. 740 reduction: Type of reduction to apply to loss. 741 742 Returns: 743 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 744 `NONE`, this has shape `[batch_size]`; otherwise, it is scalar. 745 746 Raises: 747 ValueError: If the shape of `logits` doesn't match that of `onehot_labels` 748 or if the shape of `weights` is invalid or if `weights` is None. Also if 749 `onehot_labels` or `logits` is None. 750 751 @compatibility(eager) 752 The `loss_collection` argument is ignored when executing eagerly. Consider 753 holding on to the return value or collecting losses via a `tf.keras.Model`. 754 @end_compatibility 755 """ 756 if onehot_labels is None: 757 raise ValueError("onehot_labels must not be None.") 758 if logits is None: 759 raise ValueError("logits must not be None.") 760 with ops.name_scope(scope, "softmax_cross_entropy_loss", 761 (logits, onehot_labels, weights)) as scope: 762 logits = ops.convert_to_tensor(logits) 763 onehot_labels = math_ops.cast(onehot_labels, logits.dtype) 764 logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape()) 765 766 if label_smoothing > 0: 767 num_classes = math_ops.cast( 768 array_ops.shape(onehot_labels)[-1], logits.dtype) 769 smooth_positives = 1.0 - label_smoothing 770 smooth_negatives = label_smoothing / num_classes 771 onehot_labels = onehot_labels * smooth_positives + smooth_negatives 772 773 onehot_labels = array_ops.stop_gradient( 774 onehot_labels, name="labels_stop_gradient") 775 losses = nn.softmax_cross_entropy_with_logits_v2( 776 labels=onehot_labels, logits=logits, name="xentropy") 777 778 return compute_weighted_loss( 779 losses, weights, scope, loss_collection, reduction=reduction) 780 781 782# TODO(ptucker): Merge this with similar method in metrics_impl. 783def _remove_squeezable_dimensions( 784 labels, predictions, weights=None, expected_rank_diff=0): 785 """Internal version of _remove_squeezable_dimensions which handles weights. 786 787 Squeezes `predictions` and `labels` if their ranks differ from expected by 788 exactly 1. 789 Squeezes `weights` if its rank is 1 more than the new rank of `predictions` 790 791 This will use static shape if available. Otherwise, it will add graph 792 operations, which could result in a performance hit. 793 794 Args: 795 labels: Label values, a `Tensor` whose dimensions match `predictions`. 796 predictions: Predicted values, a `Tensor` of arbitrary dimensions. 797 weights: Optional weight `Tensor`. It will be squeezed if it's not scalar, 798 and its rank is 1 more than the new rank of `labels`. 799 expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. 800 801 Returns: 802 Tuple of `predictions`, `labels` and `weights`, possibly with the last 803 dimension squeezed. 804 """ 805 labels, predictions = confusion_matrix.remove_squeezable_dimensions( 806 labels, predictions, expected_rank_diff=expected_rank_diff) 807 808 if weights is not None: 809 weights = ops.convert_to_tensor(weights) 810 labels_rank = labels.get_shape().ndims 811 weights_shape = weights.get_shape() 812 weights_rank = weights_shape.ndims 813 814 if (labels_rank is not None) and (weights_rank is not None): 815 # Use static rank. 816 rank_diff = weights_rank - labels_rank 817 if rank_diff == 1: 818 weights = array_ops.squeeze(weights, [-1]) 819 return labels, predictions, weights 820 821 # Use dynamic rank. 822 rank_diff = array_ops.rank(weights) - array_ops.rank(labels) 823 if (weights_rank is None) or ( 824 weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)): 825 weights = control_flow_ops.cond( 826 math_ops.equal(1, rank_diff), 827 lambda: array_ops.squeeze(weights, [-1]), 828 lambda: weights) 829 830 return labels, predictions, weights 831 832 833@tf_export(v1=["losses.sparse_softmax_cross_entropy"]) 834def sparse_softmax_cross_entropy( 835 labels, logits, weights=1.0, scope=None, 836 loss_collection=ops.GraphKeys.LOSSES, 837 reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): 838 """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. 839 840 `weights` acts as a coefficient for the loss. If a scalar is provided, 841 then the loss is simply scaled by the given value. If `weights` is a 842 tensor of shape `[batch_size]`, then the loss weights apply to each 843 corresponding sample. 844 845 Args: 846 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 847 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 848 must be an index in `[0, num_classes)`. Other values will raise an 849 exception when this op is run on CPU, and return `NaN` for corresponding 850 loss and gradient rows on GPU. 851 logits: Unscaled log probabilities of shape 852 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or 853 `float64`. 854 weights: Coefficients for the loss. This must be scalar or broadcastable to 855 `labels` (i.e. same rank and each dimension is either 1 or the same). 856 scope: the scope for the operations performed in computing the loss. 857 loss_collection: collection to which the loss will be added. 858 reduction: Type of reduction to apply to loss. 859 860 Returns: 861 Weighted loss `Tensor` of the same type as `logits`. If `reduction` is 862 `NONE`, this has the same shape as `labels`; otherwise, it is scalar. 863 864 Raises: 865 ValueError: If the shapes of `logits`, `labels`, and `weights` are 866 incompatible, or if any of them are None. 867 868 @compatibility(eager) 869 The `loss_collection` argument is ignored when executing eagerly. Consider 870 holding on to the return value or collecting losses via a `tf.keras.Model`. 871 @end_compatibility 872 """ 873 if labels is None: 874 raise ValueError("labels must not be None.") 875 if logits is None: 876 raise ValueError("logits must not be None.") 877 with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", 878 (logits, labels, weights)) as scope: 879 # As documented above in Args, labels contain class IDs and logits contains 880 # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; 881 # therefore, expected_rank_diff=1. 882 labels, logits, weights = _remove_squeezable_dimensions( 883 labels, logits, weights, expected_rank_diff=1) 884 losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, 885 logits=logits, 886 name="xentropy") 887 return compute_weighted_loss( 888 losses, weights, scope, loss_collection, reduction=reduction) 889