• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""loss"""
16import mindspore
17import mindspore.common.dtype as mstype
18from mindspore import log
19from mindspore.common.tensor import Tensor
20from mindspore.common.parameter import Parameter
21from mindspore.ops import operations as P
22from mindspore.ops import functional as F
23from mindspore import nn
24from mindspore.ops.primitive import constexpr
25from mindspore.nn.cell import Cell
26from mindspore.nn.layer.activation import get_activation
27from mindspore._checkparam import Validator as validator
28from mindspore._checkparam import Rel
29from ... import context
30
31
32class LossBase(Cell):
33    """
34    Base class for other losses.
35
36    Other losses derived from this should implement their own `construct` and use method `self.get_loss`
37    to apply reduction to loss values.
38
39    Args:
40        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
41            Default: "mean".
42
43    Raises:
44        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
45
46    Supported Platforms:
47        ``Ascend`` ``GPU`` ``CPU``
48    """
49    def __init__(self, reduction='mean'):
50        """Initialize Loss."""
51        super(LossBase, self).__init__()
52
53        if reduction not in ('mean', 'sum', 'none'):
54            raise ValueError(f"For '{self.cls_name}', the 'reduction' should be in ['mean', 'sum', 'none'], "
55                             f"but got {reduction}.")
56
57        self.average = True
58        self.reduce = True
59        if reduction == 'sum':
60            self.average = False
61        if reduction == 'none':
62            self.reduce = False
63
64        self.reduce_mean = P.ReduceMean()
65        self.reduce_sum = P.ReduceSum()
66        self.mul = P.Mul()
67        self.cast = P.Cast()
68
69    def get_axis(self, x):
70        """
71        Get a range of axis for input.
72
73        Args:
74            x (Tensor): Tensor of any shape.
75        """
76        shape = F.shape(x)
77        length = F.tuple_len(shape)
78        perm = F.make_range(0, length)
79        return perm
80
81    def get_loss(self, x, weights=1.0):
82        """
83        Computes the weighted loss.
84
85        Args:
86            x (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
87                additional dimensions.
88            weights (Union[float, Tensor]): Optional `Tensor` whose rank is either 0, or the same rank as inputs,
89                and must be broadcastable to inputs (i.e., all dimensions must be either `1`,
90                or the same as the corresponding inputs dimension).
91        """
92        input_dtype = x.dtype
93        x = self.cast(x, mstype.float32)
94        weights = self.cast(weights, mstype.float32)
95        x = self.mul(weights, x)
96        if self.reduce and self.average:
97            x = self.reduce_mean(x, self.get_axis(x))
98        if self.reduce and not self.average:
99            x = self.reduce_sum(x, self.get_axis(x))
100        x = self.cast(x, input_dtype)
101        return x
102
103    def construct(self, logits, labels):
104        raise NotImplementedError
105
106
107class _Loss(LossBase):
108    """
109    Base class for other losses.
110    """
111    def __init__(self, reduction='mean'):
112        """Initialize _Loss."""
113        log.warning("'_Loss' is deprecated from version 1.3 and "
114                    "will be removed in a future version, use 'LossBase' instead.")
115        super(_Loss, self).__init__(reduction)
116
117    def construct(self, logits, labels):
118        raise NotImplementedError
119
120
121@constexpr
122def _check_is_tensor(param_name, input_data, cls_name):
123    """Internal function, used to check whether the input data is Tensor."""
124    if input_data is not None and not isinstance(F.typeof(input_data), mstype.tensor_type):
125        raise TypeError(f"For '{cls_name}', the '{param_name}' should be '{mstype.tensor_type}', "
126                        f"but got '{F.typeof(input_data)}'")
127
128
129class L1Loss(LossBase):
130    r"""
131    L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` element-wise,
132    where :math:`x` is the input Tensor and :math:`y` is the labels Tensor.
133
134    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
135    the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
136
137    .. math::
138        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
139
140    where :math:`N` is the batch size. If `reduction` is not 'none', then:
141
142    .. math::
143        \ell(x, y) =
144        \begin{cases}
145            \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
146            \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
147        \end{cases}
148
149    Args:
150        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
151            Default: "mean".
152
153    Inputs:
154        - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
155          additional dimensions.
156        - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases.
157          However, it supports the shape of `logits` is different from the shape of `labels`
158          and they should be broadcasted to each other.
159
160    Outputs:
161        Tensor, loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum',
162        while the shape of output is the broadcasted shape if `reduction` is 'none'.
163
164    Raises:
165        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
166
167    Supported Platforms:
168        ``Ascend`` ``GPU`` ``CPU``
169
170    Examples:
171        >>> # Case 1: logits.shape = labels.shape = (3,)
172        >>> loss = nn.L1Loss()
173        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
174        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
175        >>> output = loss(logits, labels)
176        >>> print(output)
177        0.33333334
178        >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3)
179        >>> loss = nn.L1Loss(reduction='none')
180        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
181        >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
182        >>> output = loss(logits, labels)
183        >>> print(output)
184        [[0. 1. 2.]
185         [0. 0. 1.]]
186    """
187    def __init__(self, reduction='mean'):
188        """Initialize L1Loss."""
189        super(L1Loss, self).__init__(reduction)
190        self.abs = P.Abs()
191
192    def construct(self, logits, labels):
193        _check_is_tensor('logits', logits, self.cls_name)
194        _check_is_tensor('labels', labels, self.cls_name)
195        x = self.abs(logits - labels)
196        return self.get_loss(x)
197
198
199class MSELoss(LossBase):
200    r"""
201    MSELoss creates a criterion to measure the mean squared error (squared L2-norm) between :math:`x` and :math:`y`
202    element-wise, where :math:`x` is the input and :math:`y` is the labels.
203
204    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
205    the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
206
207    .. math::
208        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with} \quad l_n = (x_n - y_n)^2.
209
210    where :math:`N` is the batch size. If `reduction` is not 'none', then:
211
212    .. math::
213        \ell(x, y) =
214        \begin{cases}
215            \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\
216            \operatorname{sum}(L),  & \text{if reduction} = \text{`sum'.}
217        \end{cases}
218
219    Args:
220        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
221            Default: "mean".
222
223    Inputs:
224        - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
225          additional dimensions.
226        - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases.
227          However, it supports the shape of `logits` is different from the shape of `labels`
228          and they should be broadcasted to each other.
229
230    Outputs:
231        Tensor, loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum',
232        while the shape of output is the broadcasted shape if `reduction` is 'none'.
233
234    Raises:
235        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
236
237    Supported Platforms:
238        ``Ascend`` ``GPU`` ``CPU``
239
240    Examples:
241        >>> # Case 1: logits.shape = labels.shape = (3,)
242        >>> loss = nn.MSELoss()
243        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
244        >>> labels = Tensor(np.array([1, 1, 1]), mindspore.float32)
245        >>> output = loss(logits, labels)
246        >>> print(output)
247        1.6666667
248        >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3)
249        >>> loss = nn.MSELoss(reduction='none')
250        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
251        >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
252        >>> output = loss(logits, labels)
253        >>> print(output)
254        [[0. 1. 4.]
255         [0. 0. 1.]]
256    """
257    def construct(self, logits, labels):
258        _check_is_tensor('logits', logits, self.cls_name)
259        _check_is_tensor('labels', labels, self.cls_name)
260        x = F.square(logits - labels)
261        return self.get_loss(x)
262
263
264class RMSELoss(LossBase):
265    r"""
266    RMSELoss creates a criterion to measure the root mean square error between :math:`x` and :math:`y`
267    element-wise, where :math:`x` is the input and :math:`y` is the labels.
268
269    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
270    the loss of :math:`x` and :math:`y` is given as:
271
272    .. math::
273        loss = \sqrt{\frac{1}{N}\sum_{i=1}^{N}{(x_i-y_i)^2}}
274
275    Inputs:
276        - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
277          additional dimensions.
278        - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases.
279          However, it supports the shape of `logits` is different from the shape of `labels`
280          and they should be broadcasted to each other.
281
282    Outputs:
283        Tensor, weighted loss float tensor and its shape is zero.
284
285    Supported Platforms:
286        ``Ascend`` ``GPU`` ``CPU``
287
288    Examples:
289        >>> # Case 1: logits.shape = labels.shape = (3,)
290        >>> loss = nn.RMSELoss()
291        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
292        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
293        >>> output = loss(logits, labels)
294        >>> print(output)
295        0.57735026
296        >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3)
297        >>> loss = nn.RMSELoss()
298        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
299        >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
300        >>> output = loss(logits, labels)
301        >>> print(output)
302        1.0
303    """
304    def __init__(self):
305        """Initialize RMSELoss."""
306        super(RMSELoss, self).__init__()
307        self.MSELoss = MSELoss()
308
309    def construct(self, logits, label):
310        rmse_loss = F.sqrt(self.MSELoss(logits, label))
311
312        return rmse_loss
313
314
315class MAELoss(LossBase):
316    r"""
317    MAELoss creates a criterion to measure the average absolute error between :math:`x` and :math:`y`
318    element-wise, where :math:`x` is the input and :math:`y` is the labels.
319
320    For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`,
321    the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as:
322
323    .. math::
324        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|,
325
326    where :math:`N` is the batch size. If `reduction` is not 'none', then:
327
328    .. math::
329        \ell(x, y) =
330        \begin{cases}
331            \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
332            \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
333        \end{cases}
334
335    Args:
336        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
337                         Default: "mean".
338
339    Inputs:
340        - **logits** (Tensor) - Tensor of shape :math:`(M, *)` where :math:`*` means, any number of
341          additional dimensions.
342        - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases.
343          However, it supports the shape of `logits` is different from the shape of `labels`
344          and they should be broadcasted to each other.
345
346    Outputs:
347        Tensor, weighted loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum',
348        while the shape of output is the broadcasted shape if `reduction` is 'none'.
349
350    Raises:
351        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
352
353    Supported Platforms:
354        ``Ascend`` ``GPU`` ``CPU``
355
356    Examples:
357        >>> # Case 1: logits.shape = labels.shape = (3,)
358        >>> loss = nn.MAELoss()
359        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
360        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
361        >>> output = loss(logits, labels)
362        >>> print(output)
363        0.33333334
364        >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3)
365        >>> loss = nn.MAELoss(reduction='none')
366        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
367        >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32)
368        >>> output = loss(logits, labels)
369        >>> print(output)
370        [[0. 1. 2.]
371         [0. 0. 1.]]
372    """
373    def __init__(self, reduction='mean'):
374        """Initialize MAELoss."""
375        super(MAELoss, self).__init__(reduction)
376        self.abs = P.Abs()
377
378    def construct(self, logits, label):
379        _check_is_tensor('logits', logits, self.cls_name)
380        _check_is_tensor('labels', label, self.cls_name)
381        x = self.abs(logits - label)
382        return self.get_loss(x)
383
384
385class SmoothL1Loss(LossBase):
386    r"""
387    A loss class for learning region proposals.
388
389    SmoothL1Loss can be regarded as modified version of L1Loss or a combination of L1Loss and L2Loss.
390    L1Loss computes the element-wise absolute difference between two input tensors while L2Loss computes the
391    squared difference between two input tensors. L2Loss often leads to faster convergence but it is less
392    robust to outliers.
393
394    Given two input :math:`x,\  y` of length :math:`N`, the unreduced SmoothL1Loss can be described
395    as follows:
396
397    .. math::
398        L_{i} =
399        \begin{cases}
400        \frac{0.5 (x_i - y_i)^{2}}{\text{beta}}, & \text{if } |x_i - y_i| < \text{beta} \\
401        |x_i - y_i| - 0.5 \text{beta}, & \text{otherwise. }
402        \end{cases}
403
404    Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear.
405    Its default value is 1.0. :math:`N` is the batch size. This function returns an
406    unreduced loss tensor.
407
408    Args:
409        beta (float): A parameter used to control the point where the function will change from
410            quadratic to linear. Default: 1.0.
411
412    Inputs:
413        - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
414          additional dimensions. Data type must be float16 or float32.
415        - **labels** (Tensor) - Ground truth data, tensor of shape :math:`(N, *)`,
416          same shape and dtype as the `logits`.
417
418    Outputs:
419        Tensor, loss float tensor, same shape and dtype as the `logits`.
420
421    Raises:
422        TypeError: If `beta` is not a float.
423        TypeError: If dtype of `logits` or `labels` is neither float16 not float32.
424        ValueError: If `beta` is less than or equal to 0.
425        ValueError: If shape of `logits` is not the same as `labels`.
426
427    Supported Platforms:
428        ``Ascend`` ``GPU`` ``CPU``
429
430    Examples:
431        >>> loss = nn.SmoothL1Loss()
432        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
433        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
434        >>> output = loss(logits, labels)
435        >>> print(output)
436        [0.  0.  0.5]
437    """
438    def __init__(self, beta=1.0):
439        """Initialize SmoothL1Loss."""
440        super(SmoothL1Loss, self).__init__()
441        self.beta = beta
442        self.smooth_l1_loss = P.SmoothL1Loss(self.beta)
443
444    def construct(self, logits, labels):
445        _check_is_tensor('logits', logits, self.cls_name)
446        _check_is_tensor('labels', labels, self.cls_name)
447        return self.smooth_l1_loss(logits, labels)
448
449
450class SoftMarginLoss(LossBase):
451    r"""
452    A loss class for two-class classification problems.
453
454    SoftMarginLoss creates a criterion that optimizes a two-class classification
455    logistic loss between input tensor :math:`x` and labels tensor :math:`y`
456    (containing 1 or -1).
457
458    .. math::
459        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
460
461    Args:
462        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
463
464    Inputs:
465        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
466        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
467
468    Outputs:
469        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
470        Otherwise, a scalar value will be returned.
471
472    Raises:
473        TypeError: If `logits` or `labels` is not a Tensor.
474        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
475        ValueError: If shape of `logits` is not the same as `labels`.
476        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
477
478    Supported Platforms:
479        ``Ascend``
480
481    Examples:
482        >>> loss = ops.SoftMarginLoss()
483        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
484        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
485        >>> output = loss(logits, labels)
486        >>> print(output)
487        0.6764238
488    """
489    def __init__(self, reduction='mean'):
490        super(SoftMarginLoss, self).__init__()
491        self.soft_margin_loss = P.SoftMarginLoss(reduction)
492
493    def construct(self, logits, labels):
494        return self.soft_margin_loss(logits, labels)
495
496
497class SoftmaxCrossEntropyWithLogits(LossBase):
498    r"""
499    Computes softmax cross entropy between logits and labels.
500
501    Measures the distribution error between the probabilities of the input (computed with softmax function) and the
502    labels where the classes are mutually exclusive (only one class is positive) using cross entropy loss.
503
504    Typical input into this function is unnormalized scores denoted as x whose shape is (N, C),
505    and the corresponding targets.
506
507    For each instance :math:`x_i`, i ranges from 0 to N-1, the loss is given as:
508
509    .. math::
510        \ell(x_i, c) = - \log\left(\frac{\exp(x_i[c])}{\sum_j \exp(x_i[j])}\right)
511        =  -x_i[c] + \log\left(\sum_j \exp(x_i[j])\right)
512
513    where :math:`x_i` is a 1D score Tensor, :math:`c` is the index of 1 in one-hot.
514
515    Note:
516        While the labels classes are mutually exclusive, i.e., only one class is positive in the labels, the predicted
517        probabilities does not need to be exclusive. It is only required that the predicted probability distribution
518        of entry is a valid one.
519
520    Args:
521        sparse (bool): Specifies whether labels use sparse format or not. Default: False.
522        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
523            If "none", do not perform reduction. Default: "none".
524
525    Inputs:
526        - **logits** (Tensor) - Tensor of shape (N, C). Data type must be float16 or float32.
527        - **labels** (Tensor) - Tensor of shape (N, ). If `sparse` is True, The type of
528          `labels` is int32 or int64. Otherwise, the type of `labels` is the same as the type of `logits`.
529
530    Outputs:
531        Tensor, a tensor of the same shape and type as logits with the component-wise logistic losses.
532
533    Raises:
534        TypeError: If `sparse` is not a bool.
535        TypeError: If `sparse` is True and dtype of `labels` is neither int32 not int64.
536        TypeError: If `sparse` is False and dtype of `labels` is neither float16 not float32.
537        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
538
539    Supported Platforms:
540        ``Ascend`` ``GPU`` ``CPU``
541
542    Examples:
543        >>> # case 1: sparse=True
544        >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
545        >>> logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), mindspore.float32)
546        >>> labels_np = np.array([1]).astype(np.int32)
547        >>> labels = Tensor(labels_np)
548        >>> output = loss(logits, labels)
549        >>> print(output)
550        [67.]
551        >>> # case 2: sparse=False
552        >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
553        >>> logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), mindspore.float32)
554        >>> labels_np = np.array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]]).astype(np.float32)
555        >>> labels = Tensor(labels_np)
556        >>> output = loss(logits, labels)
557        >>> print(output)
558        [30.]
559    """
560    def __init__(self,
561                 sparse=False,
562                 reduction='none'):
563        """Initialize SoftmaxCrossEntropyWithLogits."""
564        super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction)
565        self.sparse = validator.check_bool(sparse, "sparse")
566        self.reduction = reduction
567        self.softmax_cross_entropy = P.SoftmaxCrossEntropyWithLogits()
568        self.one_hot = P.OneHot()
569        self.on_value = Tensor(1.0, mstype.float32)
570        self.off_value = Tensor(0., mstype.float32)
571        self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"]
572        self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits()
573
574    def construct(self, logits, labels):
575        _check_is_tensor('logits', logits, self.cls_name)
576        _check_is_tensor('labels', labels, self.cls_name)
577        if self.sparse:
578            if self.reduction == 'mean':
579                x = self.sparse_softmax_cross_entropy(logits, labels)
580                return x
581            labels = self.one_hot(labels, F.shape(logits)[-1], self.on_value, self.off_value)
582        x = self.softmax_cross_entropy(logits, labels)[0]
583        return self.get_loss(x)
584
585
586@constexpr
587def _check_label_dtype(labels_dtype, cls_name):
588    """Internal function, used to check whether the data type of labels meets the requirements."""
589    validator.check_type_name("labels", labels_dtype, [mstype.int32, mstype.int64], cls_name)
590
591
592class DiceLoss(LossBase):
593    r"""
594    The Dice coefficient is a set similarity loss. It is used to calculate the similarity between two samples. The
595    value of the Dice coefficient is 1 when the segmentation result is the best and is 0 when the segmentation result
596    is the worst. The Dice coefficient indicates the ratio of the area between two objects to the total area.
597    The function is shown as follows:
598
599    .. math::
600        dice = 1 - \frac{2 * (pred \bigcap true)}{pred \bigcup true}
601
602    Args:
603        smooth (float): A term added to the denominator to improve numerical stability. Should be greater than 0.
604                        Default: 1e-5.
605
606    Inputs:
607        - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of
608          additional dimensions. The data type must be float16 or float32.
609        - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits`.
610          The data type must be float16 or float32.
611
612    Outputs:
613        Tensor, a tensor of shape with the per-example sampled Dice losses.
614
615    Raises:
616        ValueError: If the dimension of `logits` is different from `labels`.
617        TypeError: If the type of `logits` or `labels` is not a tensor.
618
619    Supported Platforms:
620        ``Ascend`` ``GPU`` ``CPU``
621
622    Examples:
623        >>> loss = nn.DiceLoss(smooth=1e-5)
624        >>> logits = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]]), mstype.float32)
625        >>> labels = Tensor(np.array([[0, 1], [1, 0], [0, 1]]), mstype.float32)
626        >>> output = loss(logits, labels)
627        >>> print(output)
628        0.38596618
629    """
630    def __init__(self, smooth=1e-5):
631        """Initialize DiceLoss."""
632        super(DiceLoss, self).__init__()
633        self.smooth = validator.check_positive_float(smooth, "smooth")
634        self.reshape = P.Reshape()
635
636    def construct(self, logits, label):
637        _check_is_tensor('logits', logits, self.cls_name)
638        _check_is_tensor('labels', label, self.cls_name)
639        _check_shape(logits.shape, label.shape, self.cls_name)
640        intersection = self.reduce_sum(self.mul(logits.view(-1), label.view(-1)))
641        unionset = self.reduce_sum(self.mul(logits.view(-1), logits.view(-1))) + \
642                   self.reduce_sum(self.mul(label.view(-1), label.view(-1)))
643
644        single_dice_coeff = (2 * intersection) / (unionset + self.smooth)
645        dice_loss = 1 - single_dice_coeff
646
647        return dice_loss
648
649
650@constexpr
651def _check_shape(logits_shape, label_shape, prim_name=None):
652    """Internal function, used to check whether the shape of logits and labels meets the requirements."""
653    validator.check('logits_shape', logits_shape, 'label_shape', label_shape, prim_name=prim_name)
654
655
656@constexpr
657def _check_ndim_multi(logits_dim, label_dim, prim_name=None):
658    """Internal function, used to check whether the dimension of logits and label meets the requirements."""
659    msg_prefix = f'For \'{prim_name}\', the' if prim_name else "The"
660    if logits_dim < 2:
661        raise ValueError(f"{msg_prefix} Logits dimension should be greater than 1, but got {logits_dim}.")
662    if label_dim < 2:
663        raise ValueError(f"{msg_prefix} Label dimension should be greater than 1, but got {label_dim}.")
664
665
666@constexpr
667def _check_weights(weight_shape, label_shape, prim_name=None):
668    """Internal function, used to check whether the reduced shape meets the requirements."""
669    msg_prefix = f'For \'{prim_name}\', the' if prim_name else "The"
670    if weight_shape != label_shape:
671        raise ValueError(f"{msg_prefix} weight_shape[0] should be equal to label_shape[1], "
672                         f"but got weight_shape[0]: {weight_shape} and label_shape[1]: {label_shape}.")
673
674
675class MultiClassDiceLoss(LossBase):
676    r"""
677    When there are multiple classifications, label is transformed into multiple binary classifications by one hot.
678    For each channel section in the channel, it can be regarded as a binary classification problem, so it can be
679    obtained through the binary loss of each category, and then the average value.
680
681    Args:
682        weights (Union[Tensor, None]): Tensor of shape :math:`(num\_classes, dim)`. The weight shape[0] should be
683            equal to labels shape[1].
684        ignore_indiex (Union[int, None]): Class index to ignore.
685        activation (Union[str, Cell]): Activate function applied to the output of the fully connected layer, eg. 'ReLU'.
686            Default: 'softmax'. Choose from: ['softmax', 'logsoftmax', 'relu', 'relu6', 'tanh','Sigmoid']
687
688    Inputs:
689        - **logits** (Tensor) - Tensor of shape :math:`(N, C, *)` where :math:`*` means, any number of additional
690          dimensions. The logits dimension should be greater than 1. The data type must be float16 or float32.
691        - **labels** (Tensor) - Tensor of shape :math:`(N, C, *)`, same shape as the `logits`.
692          The labels dimension should be greater than 1. The data type must be float16 or float32.
693
694    Outputs:
695        Tensor, a tensor of shape with the per-example sampled MultiClass Dice Losses.
696
697    Raises:
698        ValueError: If the shape of `logits` is different from `labels`.
699        TypeError: If the type of `logits` or `labels` is not a tensor.
700        ValueError: If the dimension of `logits` or `labels` is less than 2.
701        ValueError: If the weights.shape[0] is not equal to labels.shape[1].
702        ValueError: If `weights` is a tensor, but its dimension is not 2.
703
704    Supported Platforms:
705        ``Ascend`` ``GPU`` ``CPU``
706
707    Examples:
708        >>> loss = nn.MultiClassDiceLoss(weights=None, ignore_indiex=None, activation="softmax")
709        >>> logits = Tensor(np.array([[0.2, 0.5, 0.7], [0.3, 0.1, 0.5], [0.9, 0.6, 0.3]]), mstype.float32)
710        >>> labels = Tensor(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), mstype.float32)
711        >>> output = loss(logits, labels)
712        >>> print(output)
713        0.54958105
714    """
715    def __init__(self, weights=None, ignore_indiex=None, activation="softmax"):
716        """Initialize MultiClassDiceLoss."""
717        super(MultiClassDiceLoss, self).__init__()
718        activation_list = ['softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'sigmoid']
719
720        self.binarydiceloss = DiceLoss(smooth=1e-5)
721        self.weights = weights if weights is None else validator.check_value_type("weights", weights, [Tensor])
722        if isinstance(self.weights, Tensor) and self.weights.ndim != 2:
723            raise ValueError(f"For '{self.cls_name}', the dimension of 'weights' should be 2, "
724                             f"but got {self.weights.ndim}.")
725        self.ignore_indiex = ignore_indiex if ignore_indiex is None else \
726            validator.check_value_type("ignore_indiex", ignore_indiex, [int])
727        if isinstance(activation, str) and activation not in activation_list:
728            raise ValueError(f"For '{self.cls_name}', the 'activation' must be in {activation_list}, "
729                             f"but got {activation}.")
730
731        self.activation = get_activation(activation) if isinstance(activation, str) else activation
732        if self.activation is not None and not isinstance(self.activation, Cell):
733            raise TypeError(f"For '{self.cls_name}', the 'activation' must be str or Cell, "
734                            f"but got {type(self.activation)}.")
735        self.reshape = P.Reshape()
736
737    def construct(self, logits, label):
738        _check_is_tensor('logits', logits, self.cls_name)
739        _check_is_tensor('labels', label, self.cls_name)
740        _check_shape(logits.shape, label.shape, self.cls_name)
741        _check_ndim_multi(logits.ndim, label.ndim, self.cls_name)
742        total_loss = 0
743
744        if self.activation is not None:
745            logits = self.activation(logits)
746
747        for i in range(label.shape[1]):
748            if i != self.ignore_indiex:
749                dice_loss = self.binarydiceloss(logits[:, i], label[:, i])
750                if self.weights is not None:
751                    _check_weights(self.weights.shape[0], label.shape[1], self.cls_name)
752                    dice_loss *= self.weights[i]
753                total_loss += dice_loss
754
755        return total_loss/label.shape[1]
756
757
758class SampledSoftmaxLoss(LossBase):
759    r"""
760    Computes the sampled softmax training loss. This operator can accelerate the training of the softmax classifier
761    over a large number of classes. It is generally an underestimate of the full softmax loss.
762
763    Args:
764        num_sampled (int): The number of classes to randomly sample per batch.
765        num_classes (int): The number of possible classes.
766        num_true (int): The number of labels classes per training example. Default: 1.
767        sampled_values (Union[list, tuple]):  List or tuple of (`sampled_candidates`, `true_expected_count`,
768            `sampled_expected_count`) returned by a `*CandidateSampler` function.
769            Default to None, `UniformCandidateSampler` is applied.
770        remove_accidental_hits (bool): Whether to remove "accidental hits"
771            where a sampled class equals to one of the labels classes. Default: True.
772        seed (int): Random seed for candidate sampling. Default: 0
773        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
774            If "none", do not perform reduction. Default: "none".
775
776    Inputs:
777        - **weights** (Tensor) - Tensor of shape :math:`(C, dim)`.
778        - **bias** (Tensor) - Tensor of shape :math:`(C,)`. The class biases.
779        - **labels** (Tensor) - Tensor of shape :math:`(N, num\_true)`, type `int64, int32`. The labels classes.
780        - **logits** (Tensor) - Tensor of shape :math:`(N, dim)`. The forward activations of the input network.
781
782    Outputs:
783        Tensor or Scalar, if `reduction` is 'none', then output is a tensor with shape :math:`(N,)`.
784        Otherwise, the output is a scalar.
785
786    Raises:
787        TypeError: If `sampled_values` is not a list or tuple.
788        TypeError: If dtype of `labels` is neither int32 not int64.
789        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
790        ValueError: If `num_sampled` or `num_true` is greater than `num_classes`.
791        ValueError: If length of `sampled_values` is not equal to 3.
792
793    Supported Platforms:
794        ``GPU``
795
796    Examples:
797        >>> mindspore.set_seed(1)
798        >>> loss = nn.SampledSoftmaxLoss(num_sampled=4, num_classes=7, num_true=1)
799        >>> weights = Tensor(np.random.randint(0, 9, [7, 10]), mindspore.float32)
800        >>> biases = Tensor(np.random.randint(0, 9, [7]), mindspore.float32)
801        >>> labels = Tensor([0, 1, 2])
802        >>> logits = Tensor(np.random.randint(0, 9, [3, 10]), mindspore.float32)
803        >>> output = loss(weights, biases, labels, logits)
804        >>> print(output)
805        [4.6051701e+01 1.4000047e+01 6.1989022e-06]
806    """
807
808    def __init__(self, num_sampled, num_classes, num_true=1,
809                 sampled_values=None, remove_accidental_hits=True, seed=0,
810                 reduction='none'):
811        """Initialize SampledSoftmaxLoss."""
812        super(SampledSoftmaxLoss, self).__init__(reduction)
813
814        if num_true < 1:
815            raise ValueError(f"For '{self.cls_name}', the 'num_true' must be greater than or equal to 1, "
816                             f"but got {num_true}.")
817        if seed < 0:
818            raise ValueError(f"For '{self.cls_name}', the 'seed' must be greater than or equal to 0, but got {seed}.")
819        if num_sampled > num_classes:
820            raise ValueError(f"For '{self.cls_name}', the 'num_sampled' must be smaller than or "
821                             f"equal to 'num_classes', but got 'num_sampled': {num_sampled} "
822                             f"and 'num_classes': {num_classes}.")
823        if num_true > num_classes:
824            raise ValueError(f"For '{self.cls_name}', the 'num_true' must be smaller than or equal to 'num_classes', "
825                             f"but got 'num_true': {num_true} amd 'num_classes': {num_classes}.")
826        if sampled_values is not None:
827            if not isinstance(sampled_values, (list, tuple)):
828                raise TypeError(f"For '{self.cls_name}', the type of 'sampled_values' must be a list or tuple, "
829                                f"but got {type(sampled_values).__name__}.")
830            if len(sampled_values) != 3:
831                raise ValueError(f"For '{self.cls_name}', the length of 'sampled_values' must be equal to 3,"
832                                 f"but got {len(sampled_values)}.")
833
834        self.num_sampled = num_sampled
835        self.num_classes = num_classes
836        self.num_true = num_true
837        self.sampled_values = sampled_values
838        self.remove_accidental_hits = remove_accidental_hits
839        self.seed = seed
840        self.sampler = P.UniformCandidateSampler(
841            num_true,
842            num_sampled,
843            True,
844            num_classes,
845            seed,
846            remove_accidental_hits)
847        self.cast = P.Cast()
848        self.reshape = P.Reshape()
849        self.shape = P.Shape()
850        self.exp = P.Exp()
851        self.log = P.Log()
852        self.slice_op = P.Slice()
853        self.matmul = P.MatMul(False, True)
854        self.gather_v2 = P.Gather()
855        self.reduce_max_true = P.ReduceMax(True)
856        self.reduce_sum = P.ReduceSum()
857        self.reduce_sum_true = P.ReduceSum(True)
858        self.concat_dim0 = P.Concat(0)
859        self.concat_dim1 = P.Concat(1)
860        self.ones_like = P.OnesLike()
861        self.zeros_like = P.ZerosLike()
862        self.mul = P.Mul()
863        self.expand_dims = P.ExpandDims()
864        self.dtype = P.DType()
865
866    def construct(self, weights, biases, labels, logits):
867        _check_is_tensor('weights', weights, self.cls_name)
868        _check_is_tensor('biases', biases, self.cls_name)
869        _check_is_tensor('labels', labels, self.cls_name)
870        _check_is_tensor('logits', logits, self.cls_name)
871        _check_label_dtype(self.dtype(labels), self.cls_name)
872
873        logits, labels = self._compute_sampled_logits(
874            weights=weights,
875            biases=biases,
876            labels=labels,
877            logits=logits,
878            num_true=self.num_true,
879            sampled_values=self.sampled_values,
880            subtract_log_q=True)
881
882        x = self._softmax_cross_entropy(logits, labels)
883        return x
884
885    def _softmax_cross_entropy(self, logits, targets):
886        stable_exp_logits = self.exp(logits - self.reduce_max_true(logits, 1))
887        pred = stable_exp_logits / self.reduce_sum_true(stable_exp_logits, 1)
888        return -self.reduce_sum(targets * self.log(pred + 1.0e-20), 1)
889
890    def _compute_sampled_logits(self, weights,
891                                biases,
892                                labels,
893                                logits,
894                                num_true=1,
895                                sampled_values=None,
896                                subtract_log_q=True):
897        """Helper function for SampledSoftmaxLoss functions.
898
899        Computes sampled output training logits and labels suitable
900
901        Note: In the case where num_true > 1, we assign to each labels class
902        with the labels probability (1/num_true) so that the labels probabilities
903        sum to 1 per-example.
904
905        Args:
906            weights (Tensor): Tensor of shape `[num_classes, dim]`.
907            biases (Tensor): Tensor of shape `[num_classes]`.
908            labels (Tensor): Tensor of shape `[batch_size, num_true]`. The labels classes.
909            logits (Tensor): Tensor of shape `[batch_size, dim]`. The forward
910                activations of the input network.
911            num_true (int): The number of labels classes per training example.
912            sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`,
913                `sampled_expected_count`) returned by a `UniformCandidateSampler` function.
914            subtract_log_q: A `bool`. whether to subtract the log expected count of
915                the labels in the sample to get the logits of the true labels. Default: True.
916        Returns:
917            out_logits: `Tensor` object with shape
918                `[batch_size, num_true + num_sampled]`
919            out_labels: A tensor object with the same shape as `out_logits`.
920        """
921
922        if not labels.dtype == mstype.int32:
923            labels = self.cast(labels, mstype.int32)
924        labels = self.reshape(labels, (-1, num_true))
925        labels_flat = self.reshape(labels, (-1,))
926
927        # Sample the negative labels.
928        #   sampled shape: [num_sampled] tensor
929        #   true_expected_count shape is [batch_size, 1] tensor
930        #   sampled_expected_count shape is [num_sampled] tensor
931        if sampled_values is None:
932            sampled_values = self.sampler(labels)
933
934        (sampled, true_expected_count, sampled_expected_count) = sampled_values
935
936        if not sampled.dtype == mstype.int32:
937            sampled = self.cast(sampled, mstype.int32)
938        all_ids = self.concat_dim0((labels_flat, sampled))
939        all_w = self.gather_v2(weights, all_ids, 0)
940
941        n_true = self.shape(labels_flat)[0]
942        n_sampled = self.shape(sampled)[0]
943        n_dim = self.shape(all_w)[1]
944
945        true_w = self.slice_op(all_w, [0, 0], [n_true, n_dim])
946        sampled_w = self.slice_op(all_w, [n_true, 0], [n_sampled, n_dim])
947        sampled_logits = self.matmul(logits, sampled_w)
948
949        all_b = self.gather_v2(biases, all_ids, 0)
950        true_b = self.slice_op(all_b, [0], [n_true])
951        sampled_b = self.slice_op(all_b, [n_true], [n_sampled])
952
953        new_true_w_shape = (-1, num_true, n_dim)
954        row_wise_dots = self.mul(self.expand_dims(logits, 1),
955                                 self.reshape(true_w, new_true_w_shape))
956
957        # We want the row-wise dot plus biases which yields a
958        # [batch_size, num_true] tensor of true_logits.
959        dots_as_matrix = self.reshape(row_wise_dots, (-1, n_dim))
960        true_logits = self.reshape(self.reduce_sum(dots_as_matrix, 1), (-1, num_true))
961        true_b = self.reshape(true_b, (-1, num_true))
962        true_logits += true_b
963        sampled_logits += sampled_b
964
965        if subtract_log_q:
966            # Subtract log of Q(l), prior probability that l appears in sampled.
967            true_logits -= self.log(true_expected_count)
968            sampled_logits -= self.log(sampled_expected_count)
969
970        # Construct output logits and labels. The true labels/logits start at col 0.
971        out_logits = self.concat_dim1((true_logits, sampled_logits))
972
973        # true_logits is a float tensor, ones_like(true_logits) is a float
974        # tensor of ones. We then divide by num_true to ensure the per-example
975        # labels sum to 1.0, i.e. form a proper probability distribution.
976        out_labels = self.concat_dim1((
977            self.ones_like(true_logits) / num_true,
978            self.zeros_like(sampled_logits)
979        ))
980        return out_logits, out_labels
981
982
983class BCELoss(LossBase):
984    r"""
985    BCELoss creates a criterion to measure the binary cross entropy between the true labels and predicted labels.
986
987    Set the predicted labels as :math:`x`, true labels as :math:`y`, the output loss as :math:`\ell(x, y)`.
988    Let,
989
990    .. math::
991        L = \{l_1,\dots,l_N\}^\top, \quad
992        l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right]
993
994    Then,
995
996    .. math::
997        \ell(x, y) = \begin{cases}
998        L, & \text{if reduction} = \text{'none';}\\
999        \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
1000        \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
1001        \end{cases}
1002
1003    Note:
1004        Note that the predicted labels should always be the output of sigmoid and the true labels should be numbers
1005        between 0 and 1.
1006
1007    Args:
1008        weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
1009            And it must have the same shape and data type as `inputs`. Default: None
1010        reduction (str): Specifies the reduction to be applied to the output.
1011            Its value must be one of 'none', 'mean', 'sum'. Default: 'none'.
1012
1013    Inputs:
1014        - **logits** (Tensor) - The input tensor with shape :math:`(N, *)` where :math:`*` means, any number
1015          of additional dimensions. The data type must be float16 or float32.
1016        - **labels** (Tensor) - The label tensor with shape :math:`(N, *)`, the same shape and data type as `logits`.
1017
1018    Outputs:
1019        Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`.
1020        Otherwise, the output is a scalar.
1021
1022    Raises:
1023        TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 not float32.
1024        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
1025        ValueError: If shape of `logits` is not the same as `labels` or `weight` (if given).
1026
1027    Supported Platforms:
1028        ``Ascend`` ``GPU`` ``CPU``
1029
1030    Examples:
1031        >>> weight = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 3.3, 2.2]]), mindspore.float32)
1032        >>> loss = nn.BCELoss(weight=weight, reduction='mean')
1033        >>> logits = Tensor(np.array([[0.1, 0.2, 0.3], [0.5, 0.7, 0.9]]), mindspore.float32)
1034        >>> labels = Tensor(np.array([[0, 1, 0], [0, 0, 1]]), mindspore.float32)
1035        >>> output = loss(logits, labels)
1036        >>> print(output)
1037        1.8952923
1038    """
1039
1040    def __init__(self, weight=None, reduction='none'):
1041        """Initialize BCELoss."""
1042        super(BCELoss, self).__init__()
1043        self.binary_cross_entropy = P.BinaryCrossEntropy(reduction=reduction)
1044        self.weight_one = weight is None
1045        if not self.weight_one:
1046            self.weight = weight
1047        else:
1048            self.ones = P.OnesLike()
1049
1050    def construct(self, logits, labels):
1051        _check_is_tensor('logits', logits, self.cls_name)
1052        _check_is_tensor('labels', labels, self.cls_name)
1053        if self.weight_one:
1054            weight = self.ones(logits)
1055        else:
1056            weight = self.weight
1057        loss = self.binary_cross_entropy(logits, labels, weight)
1058        return loss
1059
1060
1061@constexpr
1062def _check_reduced_shape_valid(ori_shape, reduced_shape, axis, cls_name):
1063    """Internal function, used to check whether the reduced shape meets the requirements."""
1064    validator.check_reduce_shape(ori_shape, reduced_shape, axis, cls_name)
1065
1066
1067class CosineEmbeddingLoss(LossBase):
1068    r"""
1069    CosineEmbeddingLoss creates a criterion to measure the similarity between two tensors using cosine distance.
1070
1071    Given two tensors :math:`x1`, :math:`x2`, and a Tensor label :math:`y` with values 1 or -1:
1072
1073    .. math::
1074        loss(x_1, x_2, y) = \begin{cases}
1075        1-cos(x_1, x_2), & \text{if } y = 1\\
1076        max(0, cos(x_1, x_2)-margin), & \text{if } y = -1\\
1077        \end{cases}
1078
1079    Args:
1080        margin (float): Should be in [-1.0, 1.0]. Default 0.0.
1081        reduction (str): Specifies which reduction to be applied to the output. It must be one of
1082          "none", "mean", and "sum", meaning no reduction, reduce mean and sum on output, respectively. Default "mean".
1083
1084    Inputs:
1085        - **logits_x1** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number
1086          of additional dimensions.
1087        - **logits_x2** (Tensor) - Tensor of shape :math:`(N, *)`, same shape and dtype as `logits_x1`.
1088        - **labels** (Tensor) - Contains value 1 or -1. Suppose the shape of `logits_x1` is
1089          :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `labels` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
1090
1091    Outputs:
1092        Tensor or Scalar, if `reduction` is "none", its shape is the same as `labels`.
1093        Otherwise, a scalar value will be returned.
1094
1095    Raises:
1096        TypeError: If `margin` is not a float.
1097        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
1098        ValueError: If `margin` is not in range [-1, 1].
1099
1100    Supported Platforms:
1101        ``Ascend`` ``GPU`` ``CPU``
1102
1103    Examples:
1104        >>> logits_x1 = Tensor(np.array([[0.3, 0.8], [0.4, 0.3]]), mindspore.float32)
1105        >>> logits_x2 = Tensor(np.array([[0.4, 1.2], [-0.4, -0.9]]), mindspore.float32)
1106        >>> labels = Tensor(np.array([1, -1]), mindspore.int32)
1107        >>> cosine_embedding_loss = nn.CosineEmbeddingLoss()
1108        >>> output = cosine_embedding_loss(logits_x1, logits_x2, labels)
1109        >>> print(output)
1110        0.0003425479
1111    """
1112    def __init__(self, margin=0.0, reduction="mean"):
1113        """Initialize CosineEmbeddingLoss."""
1114        super(CosineEmbeddingLoss, self).__init__(reduction)
1115        self.reduce_sum = P.ReduceSum()
1116        self.maximum = P.Maximum()
1117        validator.check_value_type("margin", margin, [float], self.cls_name)
1118        self.margin = validator.check_float_range(margin, -1.0, 1.0, Rel.INC_BOTH, "margin", self.cls_name)
1119
1120    def construct(self, logits_x1, logits_x2, labels):
1121        _check_is_tensor('logits_x1', logits_x1, self.cls_name)
1122        _check_is_tensor('logits_x2', logits_x2, self.cls_name)
1123        _check_is_tensor('labels', labels, self.cls_name)
1124        F.same_type_shape(logits_x1, logits_x2)
1125        _check_reduced_shape_valid(F.shape(logits_x1), F.shape(labels), (1,), self.cls_name)
1126        # if labels > 0, 1-cosine(logits_x1, logits_x2)
1127        # else, max(0, cosine(logits_x1, logits_x2)-margin)
1128        prod_sum = self.reduce_sum(logits_x1 * logits_x2, (1,))
1129        square1 = self.reduce_sum(F.square(logits_x1), (1,))
1130        square2 = self.reduce_sum(F.square(logits_x2), (1,))
1131        denom = F.sqrt(square1) * F.sqrt(square2)
1132        cosine = prod_sum / denom
1133
1134        pos_value = 1.0 - cosine
1135        neg_value = self.maximum(cosine - self.margin, 0.0)
1136        zeros = F.zeros_like(cosine)
1137        pos_part = F.select(labels == 1, pos_value, zeros)
1138        neg_part = F.select(labels == -1, neg_value, zeros)
1139        output_unreduced = pos_part + neg_part
1140
1141        return self.get_loss(output_unreduced)
1142
1143
1144class BCEWithLogitsLoss(LossBase):
1145    r"""
1146    Adds sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy
1147    between the logits and the labels.
1148
1149    Sets input `logits` as :math:`X`, input `labels` as :math:`Y`, output as :math:`L`. Then,
1150
1151    .. math::
1152        p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}}
1153
1154    .. math::
1155        L_{ij} = -[Y_{ij} \cdot log(p_{ij}) + (1 - Y_{ij}) \cdot log(1 - p_{ij})]
1156
1157    Then,
1158
1159    .. math::
1160        \ell(x, y) = \begin{cases}
1161        L, & \text{if reduction} = \text{'none';}\\
1162        \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\
1163        \operatorname{sum}(L),  & \text{if reduction} = \text{'sum'.}
1164        \end{cases}
1165
1166    Args:
1167        reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'.
1168            If 'none', do not perform reduction. Default:'mean'.
1169        weight (Tensor, optional): A rescaling weight applied to the loss of each batch element.
1170            If not None, it can be broadcast to a tensor with shape of `logits`,
1171            data type must be float16 or float32. Default: None.
1172        pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
1173            number of classes. If not None, it must can be broadcast to a tensor with shape of `logits`,
1174            data type must be float16 or float32. Default: None.
1175
1176    Inputs:
1177        - **logits** (Tensor) - Input logits with shape :math:`(N, *)` where :math:`*` means, any number
1178          of additional dimensions. The data type must be float16 or float32.
1179        - **labels** (Tensor) - Ground truth label with shape :math:`(N, *)`, same shape and dtype as `logits`.
1180
1181    Outputs:
1182        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
1183        Otherwise, a scalar value will be returned.
1184
1185    Raises:
1186        TypeError: If data type of `logits` or `labels` is neither float16 nor float32.
1187        TypeError: If `weight` or `pos_weight` is a parameter.
1188        TypeError: If data type of `weight` or `pos_weight` is neither float16 nor float32.
1189        ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
1190        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
1191
1192    Supported Platforms:
1193        ``Ascend``  ``GPU``
1194
1195    Examples:
1196        >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]).astype(np.float32))
1197        >>> labels = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]).astype(np.float32))
1198        >>> loss = nn.BCEWithLogitsLoss()
1199        >>> output = loss(logits, labels)
1200        >>> print(output)
1201        0.3463612
1202    """
1203
1204    def __init__(self, reduction='mean', weight=None, pos_weight=None):
1205        """Initialize BCEWithLogitsLoss."""
1206        super(BCEWithLogitsLoss, self).__init__()
1207        self.bce_with_logits_loss = P.BCEWithLogitsLoss(reduction=reduction)
1208        if isinstance(weight, Parameter):
1209            raise TypeError(f"For '{self.cls_name}', the 'weight' can not be a Parameter.")
1210        if isinstance(pos_weight, Parameter):
1211            raise TypeError(f"For '{self.cls_name}', the 'pos_weight' can not be a Parameter.")
1212        self.weight = weight
1213        self.pos_weight = pos_weight
1214        self.ones = P.OnesLike()
1215
1216    def construct(self, logits, labels):
1217        _check_is_tensor('logits', logits, self.cls_name)
1218        _check_is_tensor('labels', labels, self.cls_name)
1219        ones_input = self.ones(logits)
1220        if self.weight is not None:
1221            weight = self.weight
1222        else:
1223            weight = ones_input
1224        if self.pos_weight is not None:
1225            pos_weight = self.pos_weight
1226        else:
1227            pos_weight = ones_input
1228        loss = self.bce_with_logits_loss(logits, labels, weight, pos_weight)
1229        return loss
1230
1231
1232@constexpr
1233def _check_ndim(logits_nidm, labels_ndim, prime_name=None):
1234    '''Internal function, used to check whether the dimension of logits and labels meets the requirements.'''
1235    msg_prefix = f'For \'{prime_name}\', the' if prime_name else "The"
1236    if logits_nidm < 2 or logits_nidm > 4:
1237        raise ValueError(f"{msg_prefix} dimensions of 'logits' should be in [2, 4], but got"
1238                         f"dimension of 'logits' {logits_nidm}.")
1239    if labels_ndim < 2 or labels_ndim > 4:
1240        raise ValueError(f"{msg_prefix} dimensions of 'labels' should be in [2, 4], but got"
1241                         f"dimension of 'labels' {labels_ndim}.")
1242    if logits_nidm != labels_ndim:
1243        raise ValueError(f"{msg_prefix} dimensions of 'logits' and 'labels' must be equal, but got"
1244                         f"dimension of 'logits' {logits_nidm} and dimension of 'labels' {labels_ndim}.")
1245
1246
1247@constexpr
1248def _check_channel_and_shape(logits, labels, prime_name=None):
1249    '''Internal function, used to check whether the channels or shape of logits and labels meets the requirements.'''
1250    msg_prefix = f'For \'{prime_name}\', the' if prime_name else "The"
1251    if logits == 1:
1252        raise ValueError(f"{msg_prefix} single channel prediction is not supported, but got {logits}.")
1253    if labels not in (1, logits):
1254        raise ValueError(f"{msg_prefix} channel of 'labels' must be one or the 'labels' must be the same as that of "
1255                         f"the 'logits'. If there is only one channel, its value should be in the range [0, C-1], "
1256                         f"where C is the number of classes "
1257                         f"inferred from 'logits': C={logits}, but got 'labels': {labels}.")
1258
1259
1260@constexpr
1261def _check_input_dtype(labels_dtype, cls_name):
1262    """Internal function, used to check whether the data type of labels meets the requirements."""
1263    validator.check_type_name("labels", labels_dtype,
1264                              [mstype.int32, mstype.int64, mstype.float16, mstype.float32], cls_name)
1265
1266
1267class FocalLoss(LossBase):
1268    r"""
1269    The loss function proposed by Kaiming team in their paper ``Focal Loss for Dense Object Detection`` improves the
1270    effect of image object detection. It is a loss function to solve the imbalance of categories and the difference of
1271    classification difficulty. If you want to learn more, please refer to the paper.
1272    `Focal Loss for Dense Object Detection <https://arxiv.org/pdf/1708.02002.pdf>`_. The function is shown as follows:
1273
1274    .. math::
1275        FL(p_t) = -(1-p_t)^\gamma log(p_t)
1276
1277    Args:
1278        gamma (float): Gamma is used to adjust the steepness of weight curve in focal loss. Default: 2.0.
1279        weight (Union[Tensor, None]): A rescaling weight applied to the loss of each batch element. The dimension of
1280                                      weight should be 1. If None, no weight is applied. Default: None.
1281        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
1282                         If "none", do not perform reduction. Default: "mean".
1283
1284    Inputs:
1285        - **logits** (Tensor) - Tensor of shape should be :math:`(B, C)` or :math:`(B, C, H)` or :math:`(B, C, H, W)`.
1286          Where :math:`C` is the number of classes. Its value is greater than 1. If the shape is :math:`(B, C, H, W)`
1287          or :math:`(B, C, H)`, the :math:`H` or product of :math:`H` and :math:`W` should be the same as labels.
1288        - **labels** (Tensor) - Tensor of shape should be :math:`(B, C)` or :math:`(B, C, H)` or :math:`(B, C, H, W)`.
1289          The value of :math:`C` is 1 or it needs to be the same as predict's :math:`C`. If :math:`C` is not 1,
1290          the shape of target should be the same as that of predict, where :math:`C` is the number of classes.
1291          If the shape is :math:`(B, C, H, W)` or :math:`(B, C, H)`, the :math:`H` or product of :math:`H`
1292          and :math:`W` should be the same as logits.
1293
1294    Outputs:
1295        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
1296        Otherwise, a scalar value will be returned.
1297
1298    Raises:
1299        TypeError: If the data type of `gamma` is not a float.
1300        TypeError: If `weight` is not a Tensor.
1301        ValueError: If `labels` dim is different from `logits`.
1302        ValueError: If `labels` channel is not 1 and `labels` shape is different from `logits`.
1303        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
1304
1305    Supported Platforms:
1306        ``Ascend``
1307
1308    Example:
1309        >>> logits = Tensor([[0.8, 1.4], [0.5, 0.9], [1.2, 0.9]], mstype.float32)
1310        >>> labels = Tensor([[1], [1], [0]], mstype.int32)
1311        >>> focalloss = nn.FocalLoss(weight=Tensor([1, 2]), gamma=2.0, reduction='mean')
1312        >>> output = focalloss(logits, labels)
1313        >>> print(output)
1314        0.12516622
1315    """
1316
1317    def __init__(self, weight=None, gamma=2.0, reduction='mean'):
1318        """Initialize FocalLoss."""
1319        super(FocalLoss, self).__init__(reduction=reduction)
1320
1321        self.gamma = validator.check_value_type("gamma", gamma, [float])
1322        if weight is not None and not isinstance(weight, Tensor):
1323            raise TypeError(f"For '{self.cls_name}', the type of 'weight' should be a Tensor, "
1324                            f"but got {type(weight).__name__}.")
1325        if isinstance(weight, Tensor) and weight.ndim != 1:
1326            raise ValueError(f"For '{self.cls_name}', the dimension of 'weight' should be 1, but got {weight.ndim}.")
1327        self.weight = weight
1328        self.expand_dims = P.ExpandDims()
1329        self.gather_d = P.GatherD()
1330        self.squeeze = P.Squeeze(axis=1)
1331        self.tile = P.Tile()
1332        self.cast = P.Cast()
1333        self.dtype = P.DType()
1334        self.logsoftmax = nn.LogSoftmax(1)
1335
1336    def construct(self, logits, labels):
1337        _check_is_tensor('logits', logits, self.cls_name)
1338        _check_is_tensor('labels', labels, self.cls_name)
1339        labelss = labels
1340        _check_ndim(logits.ndim, labelss.ndim)
1341        _check_channel_and_shape(logits.shape[1], labelss.shape[1])
1342        _check_input_dtype(self.dtype(labelss), self.cls_name)
1343
1344        if logits.ndim > 2:
1345            logits = logits.view(logits.shape[0], logits.shape[1], -1)
1346            labelss = labelss.view(labelss.shape[0], labelss.shape[1], -1)
1347        else:
1348            logits = self.expand_dims(logits, 2)
1349            labelss = self.expand_dims(labelss, 2)
1350
1351        log_probability = self.logsoftmax(logits)
1352
1353        if labels.shape[1] == 1:
1354            log_probability = self.gather_d(log_probability, 1, self.cast(labelss, mindspore.int32))
1355            log_probability = self.squeeze(log_probability)
1356
1357        probability = F.exp(log_probability)
1358
1359        if self.weight is not None:
1360            convert_weight = self.weight[None, :, None]
1361            convert_weight = self.tile(convert_weight, (labelss.shape[0], 1, labelss.shape[2]))
1362            if labels.shape[1] == 1:
1363                convert_weight = self.gather_d(convert_weight, 1, self.cast(labelss, mindspore.int32))
1364                convert_weight = self.squeeze(convert_weight)
1365            log_probability = log_probability * convert_weight
1366
1367        weight = F.pows(-1 * probability + 1.0, self.gamma)
1368        if labels.shape[1] == 1:
1369            loss = (-1 * weight * log_probability).mean(axis=1)
1370        else:
1371            loss = (-1 * weight * labelss * log_probability).mean(axis=-1)
1372
1373        return self.get_loss(loss)
1374