• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of Loss operations for use in neural networks."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.eager import context
22from tensorflow.python.framework import dtypes
23from tensorflow.python.framework import ops
24from tensorflow.python.ops import array_ops
25from tensorflow.python.ops import confusion_matrix
26from tensorflow.python.ops import control_flow_ops
27from tensorflow.python.ops import math_ops
28from tensorflow.python.ops import nn
29from tensorflow.python.ops import nn_ops
30from tensorflow.python.ops import weights_broadcast_ops
31from tensorflow.python.ops.losses import util
32from tensorflow.python.util.deprecation import deprecated_args
33from tensorflow.python.util.deprecation import deprecated_argument_lookup
34from tensorflow.python.util.tf_export import tf_export
35
36
37@tf_export(v1=["losses.Reduction"])
38class Reduction(object):
39  """Types of loss reduction.
40
41  Contains the following values:
42
43  * `NONE`: Un-reduced weighted losses with the same shape as input.
44  * `SUM`: Scalar sum of weighted losses.
45  * `MEAN`: Scalar `SUM` divided by sum of weights. DEPRECATED.
46  * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses.
47  * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero
48     weights. DEPRECATED.
49  * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. DEPRECATED.
50  """
51
52  NONE = "none"
53  SUM = "weighted_sum"
54  SUM_OVER_BATCH_SIZE = "weighted_sum_over_batch_size"
55  MEAN = "weighted_mean"
56  SUM_BY_NONZERO_WEIGHTS = "weighted_sum_by_nonzero_weights"
57  SUM_OVER_NONZERO_WEIGHTS = SUM_BY_NONZERO_WEIGHTS
58
59  @classmethod
60  def all(cls):
61    return (
62        cls.NONE,
63        cls.SUM,
64        cls.MEAN,
65        cls.SUM_OVER_BATCH_SIZE,
66        cls.SUM_OVER_NONZERO_WEIGHTS,
67        cls.SUM_BY_NONZERO_WEIGHTS)
68
69  @classmethod
70  def validate(cls, key):
71    if key not in cls.all():
72      raise ValueError("Invalid Reduction Key %s." % key)
73
74
75def _safe_mean(losses, num_present):
76  """Computes a safe mean of the losses.
77
78  Args:
79    losses: `Tensor` whose elements contain individual loss measurements.
80    num_present: The number of measurable elements in `losses`.
81
82  Returns:
83    A scalar representing the mean of `losses`. If `num_present` is zero,
84      then zero is returned.
85  """
86  total_loss = math_ops.reduce_sum(losses)
87  return math_ops.div_no_nan(total_loss, num_present, name="value")
88
89
90def _num_present(losses, weights, per_batch=False):
91  """Computes the number of elements in the loss function induced by `weights`.
92
93  A given weights tensor induces different numbers of usable elements in the
94  `losses` tensor. The `weights` tensor is broadcast across `losses` for all
95  possible dimensions. For example, if `losses` is a tensor of dimension
96  `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
97  in effect, tiled to match the shape of `losses`. Following this effective
98  tile, the total number of present elements is the number of non-zero weights.
99
100  Args:
101    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
102    weights: `Tensor` of shape `[]`, `[batch_size]` or
103      `[batch_size, d1, ... dK]`, where K < N.
104    per_batch: Whether to return the number of elements per batch or as a sum
105      total.
106
107  Returns:
108    The number of present (non-zero) elements in the losses tensor. If
109      `per_batch` is `True`, the value is returned as a tensor of size
110      `[batch_size]`. Otherwise, a single scalar tensor is returned.
111  """
112  if ((isinstance(weights, float) and weights != 0.0) or
113      (context.executing_eagerly() and weights._rank() == 0  # pylint: disable=protected-access
114       and not math_ops.equal(weights, 0.0))):
115    return _num_elements(losses)
116  with ops.name_scope(None, "num_present", (losses, weights)) as scope:
117    weights = math_ops.cast(weights, dtype=dtypes.float32)
118    present = array_ops.where(
119        math_ops.equal(weights, 0.0),
120        array_ops.zeros_like(weights),
121        array_ops.ones_like(weights))
122    present = weights_broadcast_ops.broadcast_weights(present, losses)
123    if per_batch:
124      return math_ops.reduce_sum(
125          present,
126          axis=math_ops.range(1, array_ops.rank(present)),
127          keepdims=True,
128          name=scope)
129    return math_ops.reduce_sum(present, name=scope)
130
131
132def _num_elements(losses):
133  """Computes the number of elements in `losses` tensor."""
134  with ops.name_scope(None, "num_elements", values=[losses]) as scope:
135    return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
136
137
138@tf_export(v1=["losses.compute_weighted_loss"])
139def compute_weighted_loss(
140    losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES,
141    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
142  """Computes the weighted loss.
143
144  Args:
145    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
146    weights: Optional `Tensor` whose rank is either 0, or the same rank as
147      `losses`, and must be broadcastable to `losses` (i.e., all dimensions must
148      be either `1`, or the same as the corresponding `losses` dimension).
149    scope: the scope for the operations performed in computing the loss.
150    loss_collection: the loss will be added to these collections.
151    reduction: Type of reduction to apply to loss.
152
153  Returns:
154    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
155    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
156
157  Raises:
158    ValueError: If `weights` is `None` or the shape is not compatible with
159      `losses`, or if the number of dimensions (rank) of either `losses` or
160      `weights` is missing.
161
162  Note:
163    When calculating the gradient of a weighted loss contributions from
164    both `losses` and `weights` are considered. If your `weights` depend
165    on some model parameters but you do not want this to affect the loss
166    gradient, you need to apply `tf.stop_gradient` to `weights` before
167    passing them to `compute_weighted_loss`.
168
169  @compatibility(eager)
170  The `loss_collection` argument is ignored when executing eagerly. Consider
171  holding on to the return value or collecting losses via a `tf.keras.Model`.
172  @end_compatibility
173  """
174  Reduction.validate(reduction)
175  with ops.name_scope(scope, "weighted_loss", (losses, weights)):
176    # Save the `reduction` argument for loss normalization when distributing
177    # to multiple replicas. Used only for estimator + v1 optimizer flow.
178    ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access
179
180    with ops.control_dependencies((
181        weights_broadcast_ops.assert_broadcastable(weights, losses),)):
182      losses = ops.convert_to_tensor(losses)
183      input_dtype = losses.dtype
184      losses = math_ops.cast(losses, dtype=dtypes.float32)
185      weights = math_ops.cast(weights, dtype=dtypes.float32)
186      weighted_losses = math_ops.multiply(losses, weights)
187      if reduction == Reduction.NONE:
188        loss = weighted_losses
189      else:
190        loss = math_ops.reduce_sum(weighted_losses)
191        if reduction == Reduction.MEAN:
192          loss = _safe_mean(
193              loss, math_ops.reduce_sum(array_ops.ones_like(losses) * weights))
194        elif (reduction == Reduction.SUM_BY_NONZERO_WEIGHTS or
195              reduction == Reduction.SUM_OVER_NONZERO_WEIGHTS):
196          loss = _safe_mean(loss, _num_present(losses, weights))
197        elif reduction == Reduction.SUM_OVER_BATCH_SIZE:
198          loss = _safe_mean(loss, _num_elements(losses))
199
200      # Convert the result back to the input type.
201      loss = math_ops.cast(loss, input_dtype)
202      util.add_loss(loss, loss_collection)
203      return loss
204
205
206@tf_export(v1=["losses.absolute_difference"])
207def absolute_difference(
208    labels, predictions, weights=1.0, scope=None,
209    loss_collection=ops.GraphKeys.LOSSES,
210    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
211  """Adds an Absolute Difference loss to the training procedure.
212
213  `weights` acts as a coefficient for the loss. If a scalar is provided, then
214  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
215  shape `[batch_size]`, then the total loss for each sample of the batch is
216  rescaled by the corresponding element in the `weights` vector. If the shape of
217  `weights` matches the shape of `predictions`, then the loss of each
218  measurable element of `predictions` is scaled by the corresponding value of
219  `weights`.
220
221  Args:
222    labels: The ground truth output tensor, same dimensions as 'predictions'.
223    predictions: The predicted outputs.
224    weights: Optional `Tensor` whose rank is either 0, or the same rank as
225      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
226      be either `1`, or the same as the corresponding `losses` dimension).
227    scope: The scope for the operations performed in computing the loss.
228    loss_collection: collection to which this loss will be added.
229    reduction: Type of reduction to apply to loss.
230
231  Returns:
232    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
233    shape as `labels`; otherwise, it is scalar.
234
235  Raises:
236    ValueError: If the shape of `predictions` doesn't match that of
237      `labels` or if the shape of `weights` is invalid or if `labels`
238      or `predictions` is None.
239
240  @compatibility(eager)
241  The `loss_collection` argument is ignored when executing eagerly. Consider
242  holding on to the return value or collecting losses via a `tf.keras.Model`.
243  @end_compatibility
244  """
245  if labels is None:
246    raise ValueError("labels must not be None.")
247  if predictions is None:
248    raise ValueError("predictions must not be None.")
249  with ops.name_scope(scope, "absolute_difference",
250                      (predictions, labels, weights)) as scope:
251    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
252    labels = math_ops.cast(labels, dtype=dtypes.float32)
253    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
254    losses = math_ops.abs(math_ops.subtract(predictions, labels))
255    return compute_weighted_loss(
256        losses, weights, scope, loss_collection, reduction=reduction)
257
258
259@tf_export(v1=["losses.cosine_distance"])
260@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
261def cosine_distance(
262    labels, predictions, axis=None, weights=1.0, scope=None,
263    loss_collection=ops.GraphKeys.LOSSES,
264    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
265    dim=None):
266  """Adds a cosine-distance loss to the training procedure.
267
268  Note that the function assumes that `predictions` and `labels` are already
269  unit-normalized.
270
271  Args:
272    labels: `Tensor` whose shape matches 'predictions'
273    predictions: An arbitrary matrix.
274    axis: The dimension along which the cosine distance is computed.
275    weights: Optional `Tensor` whose rank is either 0, or the same rank as
276      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
277      be either `1`, or the same as the corresponding `losses` dimension).
278    scope: The scope for the operations performed in computing the loss.
279    loss_collection: collection to which this loss will be added.
280    reduction: Type of reduction to apply to loss.
281    dim: The old (deprecated) name for `axis`.
282
283  Returns:
284    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
285    shape as `labels`; otherwise, it is scalar.
286
287  Raises:
288    ValueError: If `predictions` shape doesn't match `labels` shape, or
289      `axis`, `labels`, `predictions` or `weights` is `None`.
290
291  @compatibility(eager)
292  The `loss_collection` argument is ignored when executing eagerly. Consider
293  holding on to the return value or collecting losses via a `tf.keras.Model`.
294  @end_compatibility
295  """
296  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
297  if axis is None:
298    raise ValueError("You must specify 'axis'.")
299  if labels is None:
300    raise ValueError("labels must not be None.")
301  if predictions is None:
302    raise ValueError("predictions must not be None.")
303  with ops.name_scope(scope, "cosine_distance_loss",
304                      (predictions, labels, weights)) as scope:
305    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
306    labels = math_ops.cast(labels, dtype=dtypes.float32)
307    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
308
309    radial_diffs = math_ops.multiply(predictions, labels)
310    losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keepdims=True)
311    return compute_weighted_loss(
312        losses, weights, scope, loss_collection, reduction=reduction)
313
314
315@tf_export(v1=["losses.hinge_loss"])
316def hinge_loss(labels, logits, weights=1.0, scope=None,
317               loss_collection=ops.GraphKeys.LOSSES,
318               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
319  """Adds a hinge loss to the training procedure.
320
321  Args:
322    labels: The ground truth output tensor. Its shape should match the shape of
323      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
324      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
325    logits: The logits, a float tensor. Note that logits are assumed to be
326      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
327      (resp. negative) binary prediction.
328    weights: Optional `Tensor` whose rank is either 0, or the same rank as
329      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
330      be either `1`, or the same as the corresponding `losses` dimension).
331    scope: The scope for the operations performed in computing the loss.
332    loss_collection: collection to which the loss will be added.
333    reduction: Type of reduction to apply to loss.
334
335  Returns:
336    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
337    shape as `labels`; otherwise, it is scalar.
338
339  Raises:
340    ValueError: If the shapes of `logits` and `labels` don't match or
341      if `labels` or `logits` is None.
342
343  @compatibility(eager)
344  The `loss_collection` argument is ignored when executing eagerly. Consider
345  holding on to the return value or collecting losses via a `tf.keras.Model`.
346  @end_compatibility
347  """
348  if labels is None:
349    raise ValueError("labels must not be None.")
350  if logits is None:
351    raise ValueError("logits must not be None.")
352  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
353    logits = math_ops.cast(logits, dtype=dtypes.float32)
354    labels = math_ops.cast(labels, dtype=dtypes.float32)
355    logits.get_shape().assert_is_compatible_with(labels.get_shape())
356    # We first need to convert binary labels to -1/1 labels (as floats).
357    all_ones = array_ops.ones_like(labels)
358    labels = math_ops.subtract(2 * labels, all_ones)
359    losses = nn_ops.relu(
360        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
361    return compute_weighted_loss(
362        losses, weights, scope, loss_collection, reduction=reduction)
363
364
365@tf_export(v1=["losses.huber_loss"])
366def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
367               loss_collection=ops.GraphKeys.LOSSES,
368               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
369  """Adds a [Huber Loss](https://en.wikipedia.org/wiki/Huber_loss) term to the training procedure.
370
371  For each value x in `error=labels-predictions`, the following is calculated:
372
373  ```
374    0.5 * x^2                  if |x| <= d
375    0.5 * d^2 + d * (|x| - d)  if |x| > d
376  ```
377
378  where d is `delta`.
379
380  `weights` acts as a coefficient for the loss. If a scalar is provided, then
381  the loss is simply scaled by the given value. If `weights` is a tensor of size
382  `[batch_size]`, then the total loss for each sample of the batch is rescaled
383  by the corresponding element in the `weights` vector. If the shape of
384  `weights` matches the shape of `predictions`, then the loss of each
385  measurable element of `predictions` is scaled by the corresponding value of
386  `weights`.
387
388  Args:
389    labels: The ground truth output tensor, same dimensions as 'predictions'.
390    predictions: The predicted outputs.
391    weights: Optional `Tensor` whose rank is either 0, or the same rank as
392      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
393      be either `1`, or the same as the corresponding `losses` dimension).
394    delta: `float`, the point where the huber loss function changes from a
395      quadratic to linear.
396    scope: The scope for the operations performed in computing the loss.
397    loss_collection: collection to which the loss will be added.
398    reduction: Type of reduction to apply to loss.
399
400  Returns:
401    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
402    shape as `labels`; otherwise, it is scalar.
403
404  Raises:
405    ValueError: If the shape of `predictions` doesn't match that of `labels` or
406      if the shape of `weights` is invalid.  Also if `labels` or
407     `predictions` is None.
408
409  @compatibility(eager)
410  The `loss_collection` argument is ignored when executing eagerly. Consider
411  holding on to the return value or collecting losses via a `tf.keras.Model`.
412  @end_compatibility
413  """
414  if labels is None:
415    raise ValueError("labels must not be None.")
416  if predictions is None:
417    raise ValueError("predictions must not be None.")
418  with ops.name_scope(scope, "huber_loss",
419                      (predictions, labels, weights)) as scope:
420    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
421    labels = math_ops.cast(labels, dtype=dtypes.float32)
422    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
423    error = math_ops.subtract(predictions, labels)
424    abs_error = math_ops.abs(error)
425    quadratic = math_ops.minimum(abs_error, delta)
426    # The following expression is the same in value as
427    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
428    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
429    # This is necessary to avoid doubling the gradient, since there is already a
430    # nonzero contribution to the gradient from the quadratic term.
431    linear = math_ops.subtract(abs_error, quadratic)
432    losses = math_ops.add(
433        math_ops.multiply(
434            ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
435            math_ops.multiply(quadratic, quadratic)),
436        math_ops.multiply(delta, linear))
437    return compute_weighted_loss(
438        losses, weights, scope, loss_collection, reduction=reduction)
439
440
441@tf_export(v1=["losses.log_loss"])
442def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
443             loss_collection=ops.GraphKeys.LOSSES,
444             reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
445  """Adds a Log Loss term to the training procedure.
446
447  `weights` acts as a coefficient for the loss. If a scalar is provided, then
448  the loss is simply scaled by the given value. If `weights` is a tensor of size
449  `[batch_size]`, then the total loss for each sample of the batch is rescaled
450  by the corresponding element in the `weights` vector. If the shape of
451  `weights` matches the shape of `predictions`, then the loss of each
452  measurable element of `predictions` is scaled by the corresponding value of
453  `weights`.
454
455  Args:
456    labels: The ground truth output tensor, same dimensions as 'predictions'.
457    predictions: The predicted outputs.
458    weights: Optional `Tensor` whose rank is either 0, or the same rank as
459      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
460      be either `1`, or the same as the corresponding `losses` dimension).
461    epsilon: A small increment to add to avoid taking a log of zero.
462    scope: The scope for the operations performed in computing the loss.
463    loss_collection: collection to which the loss will be added.
464    reduction: Type of reduction to apply to loss.
465
466  Returns:
467    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
468    shape as `labels`; otherwise, it is scalar.
469
470  Raises:
471    ValueError: If the shape of `predictions` doesn't match that of `labels` or
472      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
473      is None.
474
475  @compatibility(eager)
476  The `loss_collection` argument is ignored when executing eagerly. Consider
477  holding on to the return value or collecting losses via a `tf.keras.Model`.
478  @end_compatibility
479  """
480  if labels is None:
481    raise ValueError("labels must not be None.")
482  if predictions is None:
483    raise ValueError("predictions must not be None.")
484  with ops.name_scope(scope, "log_loss",
485                      (predictions, labels, weights)) as scope:
486    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
487    labels = math_ops.cast(labels, dtype=dtypes.float32)
488    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
489    losses = -math_ops.multiply(
490        labels,
491        math_ops.log(predictions + epsilon)) - math_ops.multiply(
492            (1 - labels), math_ops.log(1 - predictions + epsilon))
493    return compute_weighted_loss(
494        losses, weights, scope, loss_collection, reduction=reduction)
495
496
497# TODO(b/37208492): Add reduction arg.
498@tf_export(v1=["losses.mean_pairwise_squared_error"])
499def mean_pairwise_squared_error(
500    labels, predictions, weights=1.0, scope=None,
501    loss_collection=ops.GraphKeys.LOSSES):
502  """Adds a pairwise-errors-squared loss to the training procedure.
503
504  Unlike `mean_squared_error`, which is a measure of the differences between
505  corresponding elements of `predictions` and `labels`,
506  `mean_pairwise_squared_error` is a measure of the differences between pairs of
507  corresponding elements of `predictions` and `labels`.
508
509  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
510  three pairs of differences are summed to compute the loss:
511    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
512
513  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
514  corresponding pairs are computed within each batch sample but not across
515  samples within a batch. For example, if `predictions` represents a batch of
516  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
517  is drawn from each image, but not across images.
518
519  `weights` acts as a coefficient for the loss. If a scalar is provided, then
520  the loss is simply scaled by the given value. If `weights` is a tensor of size
521  `[batch_size]`, then the total loss for each sample of the batch is rescaled
522  by the corresponding element in the `weights` vector.
523
524  Args:
525    labels: The ground truth output tensor, whose shape must match the shape of
526      `predictions`.
527    predictions: The predicted outputs, a tensor of size
528      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
529      `predictions`.
530    weights: Coefficients for the loss a scalar, a tensor of shape
531      `[batch_size]` or a tensor whose shape matches `predictions`.
532    scope: The scope for the operations performed in computing the loss.
533    loss_collection: collection to which the loss will be added.
534
535  Returns:
536    A scalar `Tensor` that returns the weighted loss.
537
538  Raises:
539    ValueError: If the shape of `predictions` doesn't match that of `labels` or
540      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
541      is None.
542
543  @compatibility(eager)
544  The `loss_collection` argument is ignored when executing eagerly. Consider
545  holding on to the return value or collecting losses via a `tf.keras.Model`.
546  @end_compatibility
547  """
548  if labels is None:
549    raise ValueError("labels must not be None.")
550  if predictions is None:
551    raise ValueError("predictions must not be None.")
552  with ops.name_scope(scope, "mean_pairwise_squared_error",
553                      (predictions, labels, weights)) as scope:
554    weights = math_ops.cast(weights, dtype=dtypes.float32)
555    labels = math_ops.cast(labels, dtype=dtypes.float32)
556    with ops.control_dependencies((
557        weights_broadcast_ops.assert_broadcastable(weights, labels),)):
558      predictions = math_ops.cast(predictions, dtype=dtypes.float32)
559      predictions.get_shape().assert_is_compatible_with(labels.get_shape())
560
561      diffs = math_ops.subtract(predictions, labels)
562
563      axis = math_ops.range(1, array_ops.rank(diffs))
564
565      sum_squares_diff_per_batch = math_ops.reduce_sum(
566          math_ops.square(diffs), axis=axis, keepdims=True)
567      num_present_per_batch = _num_present(diffs, weights, per_batch=True)
568
569      term1 = 2.0 * math_ops.div_no_nan(
570          sum_squares_diff_per_batch,
571          math_ops.maximum(num_present_per_batch - 1, 0),
572          name="value")
573
574      sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
575      term2 = 2.0 * math_ops.div_no_nan(
576          math_ops.square(sum_diff),
577          math_ops.maximum(
578              math_ops.multiply(num_present_per_batch,
579                                num_present_per_batch - 1), 0),
580          name="value")
581
582      weighted_losses = math_ops.multiply(term1 - term2, weights)
583      loss = math_ops.reduce_sum(weighted_losses)
584
585      mean_loss = array_ops.where(
586          math_ops.reduce_sum(num_present_per_batch) > 0,
587          loss,
588          array_ops.zeros_like(loss),
589          name="value")
590      util.add_loss(mean_loss, loss_collection)
591      return mean_loss
592
593
594@tf_export(v1=["losses.mean_squared_error"])
595def mean_squared_error(
596    labels, predictions, weights=1.0, scope=None,
597    loss_collection=ops.GraphKeys.LOSSES,
598    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
599  """Adds a Sum-of-Squares loss to the training procedure.
600
601  `weights` acts as a coefficient for the loss. If a scalar is provided, then
602  the loss is simply scaled by the given value. If `weights` is a tensor of size
603  `[batch_size]`, then the total loss for each sample of the batch is rescaled
604  by the corresponding element in the `weights` vector. If the shape of
605  `weights` matches the shape of `predictions`, then the loss of each
606  measurable element of `predictions` is scaled by the corresponding value of
607  `weights`.
608
609  Args:
610    labels: The ground truth output tensor, same dimensions as 'predictions'.
611    predictions: The predicted outputs.
612    weights: Optional `Tensor` whose rank is either 0, or the same rank as
613      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
614      be either `1`, or the same as the corresponding `losses` dimension).
615    scope: The scope for the operations performed in computing the loss.
616    loss_collection: collection to which the loss will be added.
617    reduction: Type of reduction to apply to loss.
618
619  Returns:
620    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
621    shape as `labels`; otherwise, it is scalar.
622
623  Raises:
624    ValueError: If the shape of `predictions` doesn't match that of `labels` or
625      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
626      is None.
627
628  @compatibility(eager)
629  The `loss_collection` argument is ignored when executing eagerly. Consider
630  holding on to the return value or collecting losses via a `tf.keras.Model`.
631  @end_compatibility
632  """
633  if labels is None:
634    raise ValueError("labels must not be None.")
635  if predictions is None:
636    raise ValueError("predictions must not be None.")
637  with ops.name_scope(scope, "mean_squared_error",
638                      (predictions, labels, weights)) as scope:
639    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
640    labels = math_ops.cast(labels, dtype=dtypes.float32)
641    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
642    losses = math_ops.squared_difference(predictions, labels)
643    return compute_weighted_loss(
644        losses, weights, scope, loss_collection, reduction=reduction)
645
646
647@tf_export(v1=["losses.sigmoid_cross_entropy"])
648def sigmoid_cross_entropy(
649    multi_class_labels, logits, weights=1.0, label_smoothing=0, scope=None,
650    loss_collection=ops.GraphKeys.LOSSES,
651    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
652  """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.
653
654  `weights` acts as a coefficient for the loss. If a scalar is provided,
655  then the loss is simply scaled by the given value. If `weights` is a
656  tensor of shape `[batch_size]`, then the loss weights apply to each
657  corresponding sample.
658
659  If `label_smoothing` is nonzero, smooth the labels towards 1/2:
660
661      new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
662                              + 0.5 * label_smoothing
663
664  Args:
665    multi_class_labels: `[batch_size, num_classes]` target integer labels in
666      `{0, 1}`.
667    logits: Float `[batch_size, num_classes]` logits outputs of the network.
668    weights: Optional `Tensor` whose rank is either 0, or the same rank as
669      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
670      be either `1`, or the same as the corresponding `losses` dimension).
671    label_smoothing: If greater than `0` then smooth the labels.
672    scope: The scope for the operations performed in computing the loss.
673    loss_collection: collection to which the loss will be added.
674    reduction: Type of reduction to apply to loss.
675
676  Returns:
677    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
678    `NONE`, this has the same shape as `logits`; otherwise, it is scalar.
679
680  Raises:
681    ValueError: If the shape of `logits` doesn't match that of
682      `multi_class_labels` or if the shape of `weights` is invalid, or if
683      `weights` is None.  Also if `multi_class_labels` or `logits` is None.
684
685  @compatibility(eager)
686  The `loss_collection` argument is ignored when executing eagerly. Consider
687  holding on to the return value or collecting losses via a `tf.keras.Model`.
688  @end_compatibility
689  """
690  if multi_class_labels is None:
691    raise ValueError("multi_class_labels must not be None.")
692  if logits is None:
693    raise ValueError("logits must not be None.")
694  with ops.name_scope(scope, "sigmoid_cross_entropy_loss",
695                      (logits, multi_class_labels, weights)) as scope:
696    logits = ops.convert_to_tensor(logits)
697    multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype)
698    logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape())
699
700    if label_smoothing > 0:
701      multi_class_labels = (multi_class_labels * (1 - label_smoothing) +
702                            0.5 * label_smoothing)
703
704    losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels,
705                                                  logits=logits,
706                                                  name="xentropy")
707    return compute_weighted_loss(
708        losses, weights, scope, loss_collection, reduction=reduction)
709
710
711@tf_export(v1=["losses.softmax_cross_entropy"])
712def softmax_cross_entropy(
713    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
714    loss_collection=ops.GraphKeys.LOSSES,
715    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
716  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
717
718  `weights` acts as a coefficient for the loss. If a scalar is provided,
719  then the loss is simply scaled by the given value. If `weights` is a
720  tensor of shape `[batch_size]`, then the loss weights apply to each
721  corresponding sample.
722
723  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
724      new_onehot_labels = onehot_labels * (1 - label_smoothing)
725                          + label_smoothing / num_classes
726
727  Note that `onehot_labels` and `logits` must have the same shape,
728  e.g. `[batch_size, num_classes]`. The shape of `weights` must be
729  broadcastable to loss, whose shape is decided by the shape of `logits`.
730  In case the shape of `logits` is `[batch_size, num_classes]`, loss is
731  a `Tensor` of shape `[batch_size]`.
732
733  Args:
734    onehot_labels: One-hot-encoded labels.
735    logits: Logits outputs of the network.
736    weights: Optional `Tensor` that is broadcastable to loss.
737    label_smoothing: If greater than 0 then smooth the labels.
738    scope: the scope for the operations performed in computing the loss.
739    loss_collection: collection to which the loss will be added.
740    reduction: Type of reduction to apply to loss.
741
742  Returns:
743    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
744    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
745
746  Raises:
747    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
748      or if the shape of `weights` is invalid or if `weights` is None.  Also if
749      `onehot_labels` or `logits` is None.
750
751  @compatibility(eager)
752  The `loss_collection` argument is ignored when executing eagerly. Consider
753  holding on to the return value or collecting losses via a `tf.keras.Model`.
754  @end_compatibility
755  """
756  if onehot_labels is None:
757    raise ValueError("onehot_labels must not be None.")
758  if logits is None:
759    raise ValueError("logits must not be None.")
760  with ops.name_scope(scope, "softmax_cross_entropy_loss",
761                      (logits, onehot_labels, weights)) as scope:
762    logits = ops.convert_to_tensor(logits)
763    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
764    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
765
766    if label_smoothing > 0:
767      num_classes = math_ops.cast(
768          array_ops.shape(onehot_labels)[-1], logits.dtype)
769      smooth_positives = 1.0 - label_smoothing
770      smooth_negatives = label_smoothing / num_classes
771      onehot_labels = onehot_labels * smooth_positives + smooth_negatives
772
773    onehot_labels = array_ops.stop_gradient(
774        onehot_labels, name="labels_stop_gradient")
775    losses = nn.softmax_cross_entropy_with_logits_v2(
776        labels=onehot_labels, logits=logits, name="xentropy")
777
778    return compute_weighted_loss(
779        losses, weights, scope, loss_collection, reduction=reduction)
780
781
782# TODO(ptucker): Merge this with similar method in metrics_impl.
783def _remove_squeezable_dimensions(
784    labels, predictions, weights=None, expected_rank_diff=0):
785  """Internal version of _remove_squeezable_dimensions which handles weights.
786
787  Squeezes `predictions` and `labels` if their ranks differ from expected by
788  exactly 1.
789  Squeezes `weights` if its rank is 1 more than the new rank of `predictions`
790
791  This will use static shape if available. Otherwise, it will add graph
792  operations, which could result in a performance hit.
793
794  Args:
795    labels: Label values, a `Tensor` whose dimensions match `predictions`.
796    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
797    weights: Optional weight `Tensor`. It will be squeezed if it's not scalar,
798      and its rank is 1 more than the new rank of `labels`.
799    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
800
801  Returns:
802    Tuple of `predictions`, `labels` and `weights`, possibly with the last
803    dimension squeezed.
804  """
805  labels, predictions = confusion_matrix.remove_squeezable_dimensions(
806      labels, predictions, expected_rank_diff=expected_rank_diff)
807
808  if weights is not None:
809    weights = ops.convert_to_tensor(weights)
810    labels_rank = labels.get_shape().ndims
811    weights_shape = weights.get_shape()
812    weights_rank = weights_shape.ndims
813
814    if (labels_rank is not None) and (weights_rank is not None):
815      # Use static rank.
816      rank_diff = weights_rank - labels_rank
817      if rank_diff == 1:
818        weights = array_ops.squeeze(weights, [-1])
819      return labels, predictions, weights
820
821    # Use dynamic rank.
822    rank_diff = array_ops.rank(weights) - array_ops.rank(labels)
823    if (weights_rank is None) or (
824        weights_rank > 0 and weights_shape.dims[-1].is_compatible_with(1)):
825      weights = control_flow_ops.cond(
826          math_ops.equal(1, rank_diff),
827          lambda: array_ops.squeeze(weights, [-1]),
828          lambda: weights)
829
830  return labels, predictions, weights
831
832
833@tf_export(v1=["losses.sparse_softmax_cross_entropy"])
834def sparse_softmax_cross_entropy(
835    labels, logits, weights=1.0, scope=None,
836    loss_collection=ops.GraphKeys.LOSSES,
837    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
838  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.
839
840  `weights` acts as a coefficient for the loss. If a scalar is provided,
841  then the loss is simply scaled by the given value. If `weights` is a
842  tensor of shape `[batch_size]`, then the loss weights apply to each
843  corresponding sample.
844
845  Args:
846    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of
847      `labels` and result) and dtype `int32` or `int64`. Each entry in `labels`
848      must be an index in `[0, num_classes)`. Other values will raise an
849      exception when this op is run on CPU, and return `NaN` for corresponding
850      loss and gradient rows on GPU.
851    logits: Unscaled log probabilities of shape
852      `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or
853      `float64`.
854    weights: Coefficients for the loss. This must be scalar or broadcastable to
855      `labels` (i.e. same rank and each dimension is either 1 or the same).
856    scope: the scope for the operations performed in computing the loss.
857    loss_collection: collection to which the loss will be added.
858    reduction: Type of reduction to apply to loss.
859
860  Returns:
861    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
862    `NONE`, this has the same shape as `labels`; otherwise, it is scalar.
863
864  Raises:
865    ValueError: If the shapes of `logits`, `labels`, and `weights` are
866      incompatible, or if any of them are None.
867
868  @compatibility(eager)
869  The `loss_collection` argument is ignored when executing eagerly. Consider
870  holding on to the return value or collecting losses via a `tf.keras.Model`.
871  @end_compatibility
872  """
873  if labels is None:
874    raise ValueError("labels must not be None.")
875  if logits is None:
876    raise ValueError("logits must not be None.")
877  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
878                      (logits, labels, weights)) as scope:
879    # As documented above in Args, labels contain class IDs and logits contains
880    # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1;
881    # therefore, expected_rank_diff=1.
882    labels, logits, weights = _remove_squeezable_dimensions(
883        labels, logits, weights, expected_rank_diff=1)
884    losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
885                                                         logits=logits,
886                                                         name="xentropy")
887    return compute_weighted_loss(
888        losses, weights, scope, loss_collection, reduction=reduction)
889