1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""loss""" 16import mindspore 17import mindspore.common.dtype as mstype 18from mindspore import log 19from mindspore.common.tensor import Tensor 20from mindspore.common.parameter import Parameter 21from mindspore.ops import operations as P 22from mindspore.ops import functional as F 23from mindspore import nn 24from mindspore.ops.primitive import constexpr 25from mindspore.nn.cell import Cell 26from mindspore.nn.layer.activation import get_activation 27from mindspore._checkparam import Validator as validator 28from mindspore._checkparam import Rel 29from ... import context 30 31 32class LossBase(Cell): 33 """ 34 Base class for other losses. 35 36 Other losses derived from this should implement their own `construct` and use method `self.get_loss` 37 to apply reduction to loss values. 38 39 Args: 40 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 41 Default: "mean". 42 43 Raises: 44 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 45 46 Supported Platforms: 47 ``Ascend`` ``GPU`` ``CPU`` 48 """ 49 def __init__(self, reduction='mean'): 50 """Initialize Loss.""" 51 super(LossBase, self).__init__() 52 53 if reduction not in ('mean', 'sum', 'none'): 54 raise ValueError(f"For '{self.cls_name}', the 'reduction' should be in ['mean', 'sum', 'none'], " 55 f"but got {reduction}.") 56 57 self.average = True 58 self.reduce = True 59 if reduction == 'sum': 60 self.average = False 61 if reduction == 'none': 62 self.reduce = False 63 64 self.reduce_mean = P.ReduceMean() 65 self.reduce_sum = P.ReduceSum() 66 self.mul = P.Mul() 67 self.cast = P.Cast() 68 69 def get_axis(self, x): 70 """ 71 Get a range of axis for input. 72 73 Args: 74 x (Tensor): Tensor of any shape. 75 """ 76 shape = F.shape(x) 77 length = F.tuple_len(shape) 78 perm = F.make_range(0, length) 79 return perm 80 81 def get_loss(self, x, weights=1.0): 82 """ 83 Computes the weighted loss. 84 85 Args: 86 x (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 87 additional dimensions. 88 weights (Union[float, Tensor]): Optional `Tensor` whose rank is either 0, or the same rank as inputs, 89 and must be broadcastable to inputs (i.e., all dimensions must be either `1`, 90 or the same as the corresponding inputs dimension). 91 """ 92 input_dtype = x.dtype 93 x = self.cast(x, mstype.float32) 94 weights = self.cast(weights, mstype.float32) 95 x = self.mul(weights, x) 96 if self.reduce and self.average: 97 x = self.reduce_mean(x, self.get_axis(x)) 98 if self.reduce and not self.average: 99 x = self.reduce_sum(x, self.get_axis(x)) 100 x = self.cast(x, input_dtype) 101 return x 102 103 def construct(self, logits, labels): 104 raise NotImplementedError 105 106 107class _Loss(LossBase): 108 """ 109 Base class for other losses. 110 """ 111 def __init__(self, reduction='mean'): 112 """Initialize _Loss.""" 113 log.warning("'_Loss' is deprecated from version 1.3 and " 114 "will be removed in a future version, use 'LossBase' instead.") 115 super(_Loss, self).__init__(reduction) 116 117 def construct(self, logits, labels): 118 raise NotImplementedError 119 120 121@constexpr 122def _check_is_tensor(param_name, input_data, cls_name): 123 """Internal function, used to check whether the input data is Tensor.""" 124 if input_data is not None and not isinstance(F.typeof(input_data), mstype.tensor_type): 125 raise TypeError(f"For '{cls_name}', the '{param_name}' should be '{mstype.tensor_type}', " 126 f"but got '{F.typeof(input_data)}'") 127 128 129class L1Loss(LossBase): 130 r""" 131 L1Loss creates a criterion to measure the mean absolute error (MAE) between :math:`x` and :math:`y` element-wise, 132 where :math:`x` is the input Tensor and :math:`y` is the labels Tensor. 133 134 For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`, 135 the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as: 136 137 .. math:: 138 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|, 139 140 where :math:`N` is the batch size. If `reduction` is not 'none', then: 141 142 .. math:: 143 \ell(x, y) = 144 \begin{cases} 145 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 146 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 147 \end{cases} 148 149 Args: 150 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 151 Default: "mean". 152 153 Inputs: 154 - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 155 additional dimensions. 156 - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases. 157 However, it supports the shape of `logits` is different from the shape of `labels` 158 and they should be broadcasted to each other. 159 160 Outputs: 161 Tensor, loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum', 162 while the shape of output is the broadcasted shape if `reduction` is 'none'. 163 164 Raises: 165 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 166 167 Supported Platforms: 168 ``Ascend`` ``GPU`` ``CPU`` 169 170 Examples: 171 >>> # Case 1: logits.shape = labels.shape = (3,) 172 >>> loss = nn.L1Loss() 173 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 174 >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32) 175 >>> output = loss(logits, labels) 176 >>> print(output) 177 0.33333334 178 >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3) 179 >>> loss = nn.L1Loss(reduction='none') 180 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 181 >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32) 182 >>> output = loss(logits, labels) 183 >>> print(output) 184 [[0. 1. 2.] 185 [0. 0. 1.]] 186 """ 187 def __init__(self, reduction='mean'): 188 """Initialize L1Loss.""" 189 super(L1Loss, self).__init__(reduction) 190 self.abs = P.Abs() 191 192 def construct(self, logits, labels): 193 _check_is_tensor('logits', logits, self.cls_name) 194 _check_is_tensor('labels', labels, self.cls_name) 195 x = self.abs(logits - labels) 196 return self.get_loss(x) 197 198 199class MSELoss(LossBase): 200 r""" 201 MSELoss creates a criterion to measure the mean squared error (squared L2-norm) between :math:`x` and :math:`y` 202 element-wise, where :math:`x` is the input and :math:`y` is the labels. 203 204 For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`, 205 the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as: 206 207 .. math:: 208 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with} \quad l_n = (x_n - y_n)^2. 209 210 where :math:`N` is the batch size. If `reduction` is not 'none', then: 211 212 .. math:: 213 \ell(x, y) = 214 \begin{cases} 215 \operatorname{mean}(L), & \text{if reduction} = \text{`mean';}\\ 216 \operatorname{sum}(L), & \text{if reduction} = \text{`sum'.} 217 \end{cases} 218 219 Args: 220 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 221 Default: "mean". 222 223 Inputs: 224 - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 225 additional dimensions. 226 - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases. 227 However, it supports the shape of `logits` is different from the shape of `labels` 228 and they should be broadcasted to each other. 229 230 Outputs: 231 Tensor, loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum', 232 while the shape of output is the broadcasted shape if `reduction` is 'none'. 233 234 Raises: 235 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 236 237 Supported Platforms: 238 ``Ascend`` ``GPU`` ``CPU`` 239 240 Examples: 241 >>> # Case 1: logits.shape = labels.shape = (3,) 242 >>> loss = nn.MSELoss() 243 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 244 >>> labels = Tensor(np.array([1, 1, 1]), mindspore.float32) 245 >>> output = loss(logits, labels) 246 >>> print(output) 247 1.6666667 248 >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3) 249 >>> loss = nn.MSELoss(reduction='none') 250 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 251 >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32) 252 >>> output = loss(logits, labels) 253 >>> print(output) 254 [[0. 1. 4.] 255 [0. 0. 1.]] 256 """ 257 def construct(self, logits, labels): 258 _check_is_tensor('logits', logits, self.cls_name) 259 _check_is_tensor('labels', labels, self.cls_name) 260 x = F.square(logits - labels) 261 return self.get_loss(x) 262 263 264class RMSELoss(LossBase): 265 r""" 266 RMSELoss creates a criterion to measure the root mean square error between :math:`x` and :math:`y` 267 element-wise, where :math:`x` is the input and :math:`y` is the labels. 268 269 For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`, 270 the loss of :math:`x` and :math:`y` is given as: 271 272 .. math:: 273 loss = \sqrt{\frac{1}{N}\sum_{i=1}^{N}{(x_i-y_i)^2}} 274 275 Inputs: 276 - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 277 additional dimensions. 278 - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases. 279 However, it supports the shape of `logits` is different from the shape of `labels` 280 and they should be broadcasted to each other. 281 282 Outputs: 283 Tensor, weighted loss float tensor and its shape is zero. 284 285 Supported Platforms: 286 ``Ascend`` ``GPU`` ``CPU`` 287 288 Examples: 289 >>> # Case 1: logits.shape = labels.shape = (3,) 290 >>> loss = nn.RMSELoss() 291 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 292 >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32) 293 >>> output = loss(logits, labels) 294 >>> print(output) 295 0.57735026 296 >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3) 297 >>> loss = nn.RMSELoss() 298 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 299 >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32) 300 >>> output = loss(logits, labels) 301 >>> print(output) 302 1.0 303 """ 304 def __init__(self): 305 """Initialize RMSELoss.""" 306 super(RMSELoss, self).__init__() 307 self.MSELoss = MSELoss() 308 309 def construct(self, logits, label): 310 rmse_loss = F.sqrt(self.MSELoss(logits, label)) 311 312 return rmse_loss 313 314 315class MAELoss(LossBase): 316 r""" 317 MAELoss creates a criterion to measure the average absolute error between :math:`x` and :math:`y` 318 element-wise, where :math:`x` is the input and :math:`y` is the labels. 319 320 For simplicity, let :math:`x` and :math:`y` be 1-dimensional Tensor with length :math:`N`, 321 the unreduced loss (i.e. with argument reduction set to 'none') of :math:`x` and :math:`y` is given as: 322 323 .. math:: 324 \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad \text{with } l_n = \left| x_n - y_n \right|, 325 326 where :math:`N` is the batch size. If `reduction` is not 'none', then: 327 328 .. math:: 329 \ell(x, y) = 330 \begin{cases} 331 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 332 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 333 \end{cases} 334 335 Args: 336 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 337 Default: "mean". 338 339 Inputs: 340 - **logits** (Tensor) - Tensor of shape :math:`(M, *)` where :math:`*` means, any number of 341 additional dimensions. 342 - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits` in common cases. 343 However, it supports the shape of `logits` is different from the shape of `labels` 344 and they should be broadcasted to each other. 345 346 Outputs: 347 Tensor, weighted loss float tensor, the shape is zero if `reduction` is 'mean' or 'sum', 348 while the shape of output is the broadcasted shape if `reduction` is 'none'. 349 350 Raises: 351 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 352 353 Supported Platforms: 354 ``Ascend`` ``GPU`` ``CPU`` 355 356 Examples: 357 >>> # Case 1: logits.shape = labels.shape = (3,) 358 >>> loss = nn.MAELoss() 359 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 360 >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32) 361 >>> output = loss(logits, labels) 362 >>> print(output) 363 0.33333334 364 >>> # Case 2: logits.shape = (3,), labels.shape = (2, 3) 365 >>> loss = nn.MAELoss(reduction='none') 366 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 367 >>> labels = Tensor(np.array([[1, 1, 1], [1, 2, 2]]), mindspore.float32) 368 >>> output = loss(logits, labels) 369 >>> print(output) 370 [[0. 1. 2.] 371 [0. 0. 1.]] 372 """ 373 def __init__(self, reduction='mean'): 374 """Initialize MAELoss.""" 375 super(MAELoss, self).__init__(reduction) 376 self.abs = P.Abs() 377 378 def construct(self, logits, label): 379 _check_is_tensor('logits', logits, self.cls_name) 380 _check_is_tensor('labels', label, self.cls_name) 381 x = self.abs(logits - label) 382 return self.get_loss(x) 383 384 385class SmoothL1Loss(LossBase): 386 r""" 387 A loss class for learning region proposals. 388 389 SmoothL1Loss can be regarded as modified version of L1Loss or a combination of L1Loss and L2Loss. 390 L1Loss computes the element-wise absolute difference between two input tensors while L2Loss computes the 391 squared difference between two input tensors. L2Loss often leads to faster convergence but it is less 392 robust to outliers. 393 394 Given two input :math:`x,\ y` of length :math:`N`, the unreduced SmoothL1Loss can be described 395 as follows: 396 397 .. math:: 398 L_{i} = 399 \begin{cases} 400 \frac{0.5 (x_i - y_i)^{2}}{\text{beta}}, & \text{if } |x_i - y_i| < \text{beta} \\ 401 |x_i - y_i| - 0.5 \text{beta}, & \text{otherwise. } 402 \end{cases} 403 404 Here :math:`\text{beta}` controls the point where the loss function changes from quadratic to linear. 405 Its default value is 1.0. :math:`N` is the batch size. This function returns an 406 unreduced loss tensor. 407 408 Args: 409 beta (float): A parameter used to control the point where the function will change from 410 quadratic to linear. Default: 1.0. 411 412 Inputs: 413 - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 414 additional dimensions. Data type must be float16 or float32. 415 - **labels** (Tensor) - Ground truth data, tensor of shape :math:`(N, *)`, 416 same shape and dtype as the `logits`. 417 418 Outputs: 419 Tensor, loss float tensor, same shape and dtype as the `logits`. 420 421 Raises: 422 TypeError: If `beta` is not a float. 423 TypeError: If dtype of `logits` or `labels` is neither float16 not float32. 424 ValueError: If `beta` is less than or equal to 0. 425 ValueError: If shape of `logits` is not the same as `labels`. 426 427 Supported Platforms: 428 ``Ascend`` ``GPU`` ``CPU`` 429 430 Examples: 431 >>> loss = nn.SmoothL1Loss() 432 >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32) 433 >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32) 434 >>> output = loss(logits, labels) 435 >>> print(output) 436 [0. 0. 0.5] 437 """ 438 def __init__(self, beta=1.0): 439 """Initialize SmoothL1Loss.""" 440 super(SmoothL1Loss, self).__init__() 441 self.beta = beta 442 self.smooth_l1_loss = P.SmoothL1Loss(self.beta) 443 444 def construct(self, logits, labels): 445 _check_is_tensor('logits', logits, self.cls_name) 446 _check_is_tensor('labels', labels, self.cls_name) 447 return self.smooth_l1_loss(logits, labels) 448 449 450class SoftMarginLoss(LossBase): 451 r""" 452 A loss class for two-class classification problems. 453 454 SoftMarginLoss creates a criterion that optimizes a two-class classification 455 logistic loss between input tensor :math:`x` and labels tensor :math:`y` 456 (containing 1 or -1). 457 458 .. math:: 459 \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()} 460 461 Args: 462 reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean". 463 464 Inputs: 465 - **logits** (Tensor) - Predict data. Data type must be float16 or float32. 466 - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`. 467 468 Outputs: 469 Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`. 470 Otherwise, a scalar value will be returned. 471 472 Raises: 473 TypeError: If `logits` or `labels` is not a Tensor. 474 TypeError: If dtype of `logits` or `labels` is neither float16 nor float32. 475 ValueError: If shape of `logits` is not the same as `labels`. 476 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 477 478 Supported Platforms: 479 ``Ascend`` 480 481 Examples: 482 >>> loss = ops.SoftMarginLoss() 483 >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32) 484 >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32) 485 >>> output = loss(logits, labels) 486 >>> print(output) 487 0.6764238 488 """ 489 def __init__(self, reduction='mean'): 490 super(SoftMarginLoss, self).__init__() 491 self.soft_margin_loss = P.SoftMarginLoss(reduction) 492 493 def construct(self, logits, labels): 494 return self.soft_margin_loss(logits, labels) 495 496 497class SoftmaxCrossEntropyWithLogits(LossBase): 498 r""" 499 Computes softmax cross entropy between logits and labels. 500 501 Measures the distribution error between the probabilities of the input (computed with softmax function) and the 502 labels where the classes are mutually exclusive (only one class is positive) using cross entropy loss. 503 504 Typical input into this function is unnormalized scores denoted as x whose shape is (N, C), 505 and the corresponding targets. 506 507 For each instance :math:`x_i`, i ranges from 0 to N-1, the loss is given as: 508 509 .. math:: 510 \ell(x_i, c) = - \log\left(\frac{\exp(x_i[c])}{\sum_j \exp(x_i[j])}\right) 511 = -x_i[c] + \log\left(\sum_j \exp(x_i[j])\right) 512 513 where :math:`x_i` is a 1D score Tensor, :math:`c` is the index of 1 in one-hot. 514 515 Note: 516 While the labels classes are mutually exclusive, i.e., only one class is positive in the labels, the predicted 517 probabilities does not need to be exclusive. It is only required that the predicted probability distribution 518 of entry is a valid one. 519 520 Args: 521 sparse (bool): Specifies whether labels use sparse format or not. Default: False. 522 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 523 If "none", do not perform reduction. Default: "none". 524 525 Inputs: 526 - **logits** (Tensor) - Tensor of shape (N, C). Data type must be float16 or float32. 527 - **labels** (Tensor) - Tensor of shape (N, ). If `sparse` is True, The type of 528 `labels` is int32 or int64. Otherwise, the type of `labels` is the same as the type of `logits`. 529 530 Outputs: 531 Tensor, a tensor of the same shape and type as logits with the component-wise logistic losses. 532 533 Raises: 534 TypeError: If `sparse` is not a bool. 535 TypeError: If `sparse` is True and dtype of `labels` is neither int32 not int64. 536 TypeError: If `sparse` is False and dtype of `labels` is neither float16 not float32. 537 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 538 539 Supported Platforms: 540 ``Ascend`` ``GPU`` ``CPU`` 541 542 Examples: 543 >>> # case 1: sparse=True 544 >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) 545 >>> logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), mindspore.float32) 546 >>> labels_np = np.array([1]).astype(np.int32) 547 >>> labels = Tensor(labels_np) 548 >>> output = loss(logits, labels) 549 >>> print(output) 550 [67.] 551 >>> # case 2: sparse=False 552 >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=False) 553 >>> logits = Tensor(np.array([[3, 5, 6, 9, 12, 33, 42, 12, 32, 72]]), mindspore.float32) 554 >>> labels_np = np.array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]]).astype(np.float32) 555 >>> labels = Tensor(labels_np) 556 >>> output = loss(logits, labels) 557 >>> print(output) 558 [30.] 559 """ 560 def __init__(self, 561 sparse=False, 562 reduction='none'): 563 """Initialize SoftmaxCrossEntropyWithLogits.""" 564 super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) 565 self.sparse = validator.check_bool(sparse, "sparse") 566 self.reduction = reduction 567 self.softmax_cross_entropy = P.SoftmaxCrossEntropyWithLogits() 568 self.one_hot = P.OneHot() 569 self.on_value = Tensor(1.0, mstype.float32) 570 self.off_value = Tensor(0., mstype.float32) 571 self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"] 572 self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits() 573 574 def construct(self, logits, labels): 575 _check_is_tensor('logits', logits, self.cls_name) 576 _check_is_tensor('labels', labels, self.cls_name) 577 if self.sparse: 578 if self.reduction == 'mean': 579 x = self.sparse_softmax_cross_entropy(logits, labels) 580 return x 581 labels = self.one_hot(labels, F.shape(logits)[-1], self.on_value, self.off_value) 582 x = self.softmax_cross_entropy(logits, labels)[0] 583 return self.get_loss(x) 584 585 586@constexpr 587def _check_label_dtype(labels_dtype, cls_name): 588 """Internal function, used to check whether the data type of labels meets the requirements.""" 589 validator.check_type_name("labels", labels_dtype, [mstype.int32, mstype.int64], cls_name) 590 591 592class DiceLoss(LossBase): 593 r""" 594 The Dice coefficient is a set similarity loss. It is used to calculate the similarity between two samples. The 595 value of the Dice coefficient is 1 when the segmentation result is the best and is 0 when the segmentation result 596 is the worst. The Dice coefficient indicates the ratio of the area between two objects to the total area. 597 The function is shown as follows: 598 599 .. math:: 600 dice = 1 - \frac{2 * (pred \bigcap true)}{pred \bigcup true} 601 602 Args: 603 smooth (float): A term added to the denominator to improve numerical stability. Should be greater than 0. 604 Default: 1e-5. 605 606 Inputs: 607 - **logits** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number of 608 additional dimensions. The data type must be float16 or float32. 609 - **labels** (Tensor) - Tensor of shape :math:`(N, *)`, same shape as the `logits`. 610 The data type must be float16 or float32. 611 612 Outputs: 613 Tensor, a tensor of shape with the per-example sampled Dice losses. 614 615 Raises: 616 ValueError: If the dimension of `logits` is different from `labels`. 617 TypeError: If the type of `logits` or `labels` is not a tensor. 618 619 Supported Platforms: 620 ``Ascend`` ``GPU`` ``CPU`` 621 622 Examples: 623 >>> loss = nn.DiceLoss(smooth=1e-5) 624 >>> logits = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]]), mstype.float32) 625 >>> labels = Tensor(np.array([[0, 1], [1, 0], [0, 1]]), mstype.float32) 626 >>> output = loss(logits, labels) 627 >>> print(output) 628 0.38596618 629 """ 630 def __init__(self, smooth=1e-5): 631 """Initialize DiceLoss.""" 632 super(DiceLoss, self).__init__() 633 self.smooth = validator.check_positive_float(smooth, "smooth") 634 self.reshape = P.Reshape() 635 636 def construct(self, logits, label): 637 _check_is_tensor('logits', logits, self.cls_name) 638 _check_is_tensor('labels', label, self.cls_name) 639 _check_shape(logits.shape, label.shape, self.cls_name) 640 intersection = self.reduce_sum(self.mul(logits.view(-1), label.view(-1))) 641 unionset = self.reduce_sum(self.mul(logits.view(-1), logits.view(-1))) + \ 642 self.reduce_sum(self.mul(label.view(-1), label.view(-1))) 643 644 single_dice_coeff = (2 * intersection) / (unionset + self.smooth) 645 dice_loss = 1 - single_dice_coeff 646 647 return dice_loss 648 649 650@constexpr 651def _check_shape(logits_shape, label_shape, prim_name=None): 652 """Internal function, used to check whether the shape of logits and labels meets the requirements.""" 653 validator.check('logits_shape', logits_shape, 'label_shape', label_shape, prim_name=prim_name) 654 655 656@constexpr 657def _check_ndim_multi(logits_dim, label_dim, prim_name=None): 658 """Internal function, used to check whether the dimension of logits and label meets the requirements.""" 659 msg_prefix = f'For \'{prim_name}\', the' if prim_name else "The" 660 if logits_dim < 2: 661 raise ValueError(f"{msg_prefix} Logits dimension should be greater than 1, but got {logits_dim}.") 662 if label_dim < 2: 663 raise ValueError(f"{msg_prefix} Label dimension should be greater than 1, but got {label_dim}.") 664 665 666@constexpr 667def _check_weights(weight_shape, label_shape, prim_name=None): 668 """Internal function, used to check whether the reduced shape meets the requirements.""" 669 msg_prefix = f'For \'{prim_name}\', the' if prim_name else "The" 670 if weight_shape != label_shape: 671 raise ValueError(f"{msg_prefix} weight_shape[0] should be equal to label_shape[1], " 672 f"but got weight_shape[0]: {weight_shape} and label_shape[1]: {label_shape}.") 673 674 675class MultiClassDiceLoss(LossBase): 676 r""" 677 When there are multiple classifications, label is transformed into multiple binary classifications by one hot. 678 For each channel section in the channel, it can be regarded as a binary classification problem, so it can be 679 obtained through the binary loss of each category, and then the average value. 680 681 Args: 682 weights (Union[Tensor, None]): Tensor of shape :math:`(num\_classes, dim)`. The weight shape[0] should be 683 equal to labels shape[1]. 684 ignore_indiex (Union[int, None]): Class index to ignore. 685 activation (Union[str, Cell]): Activate function applied to the output of the fully connected layer, eg. 'ReLU'. 686 Default: 'softmax'. Choose from: ['softmax', 'logsoftmax', 'relu', 'relu6', 'tanh','Sigmoid'] 687 688 Inputs: 689 - **logits** (Tensor) - Tensor of shape :math:`(N, C, *)` where :math:`*` means, any number of additional 690 dimensions. The logits dimension should be greater than 1. The data type must be float16 or float32. 691 - **labels** (Tensor) - Tensor of shape :math:`(N, C, *)`, same shape as the `logits`. 692 The labels dimension should be greater than 1. The data type must be float16 or float32. 693 694 Outputs: 695 Tensor, a tensor of shape with the per-example sampled MultiClass Dice Losses. 696 697 Raises: 698 ValueError: If the shape of `logits` is different from `labels`. 699 TypeError: If the type of `logits` or `labels` is not a tensor. 700 ValueError: If the dimension of `logits` or `labels` is less than 2. 701 ValueError: If the weights.shape[0] is not equal to labels.shape[1]. 702 ValueError: If `weights` is a tensor, but its dimension is not 2. 703 704 Supported Platforms: 705 ``Ascend`` ``GPU`` ``CPU`` 706 707 Examples: 708 >>> loss = nn.MultiClassDiceLoss(weights=None, ignore_indiex=None, activation="softmax") 709 >>> logits = Tensor(np.array([[0.2, 0.5, 0.7], [0.3, 0.1, 0.5], [0.9, 0.6, 0.3]]), mstype.float32) 710 >>> labels = Tensor(np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]]), mstype.float32) 711 >>> output = loss(logits, labels) 712 >>> print(output) 713 0.54958105 714 """ 715 def __init__(self, weights=None, ignore_indiex=None, activation="softmax"): 716 """Initialize MultiClassDiceLoss.""" 717 super(MultiClassDiceLoss, self).__init__() 718 activation_list = ['softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'sigmoid'] 719 720 self.binarydiceloss = DiceLoss(smooth=1e-5) 721 self.weights = weights if weights is None else validator.check_value_type("weights", weights, [Tensor]) 722 if isinstance(self.weights, Tensor) and self.weights.ndim != 2: 723 raise ValueError(f"For '{self.cls_name}', the dimension of 'weights' should be 2, " 724 f"but got {self.weights.ndim}.") 725 self.ignore_indiex = ignore_indiex if ignore_indiex is None else \ 726 validator.check_value_type("ignore_indiex", ignore_indiex, [int]) 727 if isinstance(activation, str) and activation not in activation_list: 728 raise ValueError(f"For '{self.cls_name}', the 'activation' must be in {activation_list}, " 729 f"but got {activation}.") 730 731 self.activation = get_activation(activation) if isinstance(activation, str) else activation 732 if self.activation is not None and not isinstance(self.activation, Cell): 733 raise TypeError(f"For '{self.cls_name}', the 'activation' must be str or Cell, " 734 f"but got {type(self.activation)}.") 735 self.reshape = P.Reshape() 736 737 def construct(self, logits, label): 738 _check_is_tensor('logits', logits, self.cls_name) 739 _check_is_tensor('labels', label, self.cls_name) 740 _check_shape(logits.shape, label.shape, self.cls_name) 741 _check_ndim_multi(logits.ndim, label.ndim, self.cls_name) 742 total_loss = 0 743 744 if self.activation is not None: 745 logits = self.activation(logits) 746 747 for i in range(label.shape[1]): 748 if i != self.ignore_indiex: 749 dice_loss = self.binarydiceloss(logits[:, i], label[:, i]) 750 if self.weights is not None: 751 _check_weights(self.weights.shape[0], label.shape[1], self.cls_name) 752 dice_loss *= self.weights[i] 753 total_loss += dice_loss 754 755 return total_loss/label.shape[1] 756 757 758class SampledSoftmaxLoss(LossBase): 759 r""" 760 Computes the sampled softmax training loss. This operator can accelerate the training of the softmax classifier 761 over a large number of classes. It is generally an underestimate of the full softmax loss. 762 763 Args: 764 num_sampled (int): The number of classes to randomly sample per batch. 765 num_classes (int): The number of possible classes. 766 num_true (int): The number of labels classes per training example. Default: 1. 767 sampled_values (Union[list, tuple]): List or tuple of (`sampled_candidates`, `true_expected_count`, 768 `sampled_expected_count`) returned by a `*CandidateSampler` function. 769 Default to None, `UniformCandidateSampler` is applied. 770 remove_accidental_hits (bool): Whether to remove "accidental hits" 771 where a sampled class equals to one of the labels classes. Default: True. 772 seed (int): Random seed for candidate sampling. Default: 0 773 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 774 If "none", do not perform reduction. Default: "none". 775 776 Inputs: 777 - **weights** (Tensor) - Tensor of shape :math:`(C, dim)`. 778 - **bias** (Tensor) - Tensor of shape :math:`(C,)`. The class biases. 779 - **labels** (Tensor) - Tensor of shape :math:`(N, num\_true)`, type `int64, int32`. The labels classes. 780 - **logits** (Tensor) - Tensor of shape :math:`(N, dim)`. The forward activations of the input network. 781 782 Outputs: 783 Tensor or Scalar, if `reduction` is 'none', then output is a tensor with shape :math:`(N,)`. 784 Otherwise, the output is a scalar. 785 786 Raises: 787 TypeError: If `sampled_values` is not a list or tuple. 788 TypeError: If dtype of `labels` is neither int32 not int64. 789 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 790 ValueError: If `num_sampled` or `num_true` is greater than `num_classes`. 791 ValueError: If length of `sampled_values` is not equal to 3. 792 793 Supported Platforms: 794 ``GPU`` 795 796 Examples: 797 >>> mindspore.set_seed(1) 798 >>> loss = nn.SampledSoftmaxLoss(num_sampled=4, num_classes=7, num_true=1) 799 >>> weights = Tensor(np.random.randint(0, 9, [7, 10]), mindspore.float32) 800 >>> biases = Tensor(np.random.randint(0, 9, [7]), mindspore.float32) 801 >>> labels = Tensor([0, 1, 2]) 802 >>> logits = Tensor(np.random.randint(0, 9, [3, 10]), mindspore.float32) 803 >>> output = loss(weights, biases, labels, logits) 804 >>> print(output) 805 [4.6051701e+01 1.4000047e+01 6.1989022e-06] 806 """ 807 808 def __init__(self, num_sampled, num_classes, num_true=1, 809 sampled_values=None, remove_accidental_hits=True, seed=0, 810 reduction='none'): 811 """Initialize SampledSoftmaxLoss.""" 812 super(SampledSoftmaxLoss, self).__init__(reduction) 813 814 if num_true < 1: 815 raise ValueError(f"For '{self.cls_name}', the 'num_true' must be greater than or equal to 1, " 816 f"but got {num_true}.") 817 if seed < 0: 818 raise ValueError(f"For '{self.cls_name}', the 'seed' must be greater than or equal to 0, but got {seed}.") 819 if num_sampled > num_classes: 820 raise ValueError(f"For '{self.cls_name}', the 'num_sampled' must be smaller than or " 821 f"equal to 'num_classes', but got 'num_sampled': {num_sampled} " 822 f"and 'num_classes': {num_classes}.") 823 if num_true > num_classes: 824 raise ValueError(f"For '{self.cls_name}', the 'num_true' must be smaller than or equal to 'num_classes', " 825 f"but got 'num_true': {num_true} amd 'num_classes': {num_classes}.") 826 if sampled_values is not None: 827 if not isinstance(sampled_values, (list, tuple)): 828 raise TypeError(f"For '{self.cls_name}', the type of 'sampled_values' must be a list or tuple, " 829 f"but got {type(sampled_values).__name__}.") 830 if len(sampled_values) != 3: 831 raise ValueError(f"For '{self.cls_name}', the length of 'sampled_values' must be equal to 3," 832 f"but got {len(sampled_values)}.") 833 834 self.num_sampled = num_sampled 835 self.num_classes = num_classes 836 self.num_true = num_true 837 self.sampled_values = sampled_values 838 self.remove_accidental_hits = remove_accidental_hits 839 self.seed = seed 840 self.sampler = P.UniformCandidateSampler( 841 num_true, 842 num_sampled, 843 True, 844 num_classes, 845 seed, 846 remove_accidental_hits) 847 self.cast = P.Cast() 848 self.reshape = P.Reshape() 849 self.shape = P.Shape() 850 self.exp = P.Exp() 851 self.log = P.Log() 852 self.slice_op = P.Slice() 853 self.matmul = P.MatMul(False, True) 854 self.gather_v2 = P.Gather() 855 self.reduce_max_true = P.ReduceMax(True) 856 self.reduce_sum = P.ReduceSum() 857 self.reduce_sum_true = P.ReduceSum(True) 858 self.concat_dim0 = P.Concat(0) 859 self.concat_dim1 = P.Concat(1) 860 self.ones_like = P.OnesLike() 861 self.zeros_like = P.ZerosLike() 862 self.mul = P.Mul() 863 self.expand_dims = P.ExpandDims() 864 self.dtype = P.DType() 865 866 def construct(self, weights, biases, labels, logits): 867 _check_is_tensor('weights', weights, self.cls_name) 868 _check_is_tensor('biases', biases, self.cls_name) 869 _check_is_tensor('labels', labels, self.cls_name) 870 _check_is_tensor('logits', logits, self.cls_name) 871 _check_label_dtype(self.dtype(labels), self.cls_name) 872 873 logits, labels = self._compute_sampled_logits( 874 weights=weights, 875 biases=biases, 876 labels=labels, 877 logits=logits, 878 num_true=self.num_true, 879 sampled_values=self.sampled_values, 880 subtract_log_q=True) 881 882 x = self._softmax_cross_entropy(logits, labels) 883 return x 884 885 def _softmax_cross_entropy(self, logits, targets): 886 stable_exp_logits = self.exp(logits - self.reduce_max_true(logits, 1)) 887 pred = stable_exp_logits / self.reduce_sum_true(stable_exp_logits, 1) 888 return -self.reduce_sum(targets * self.log(pred + 1.0e-20), 1) 889 890 def _compute_sampled_logits(self, weights, 891 biases, 892 labels, 893 logits, 894 num_true=1, 895 sampled_values=None, 896 subtract_log_q=True): 897 """Helper function for SampledSoftmaxLoss functions. 898 899 Computes sampled output training logits and labels suitable 900 901 Note: In the case where num_true > 1, we assign to each labels class 902 with the labels probability (1/num_true) so that the labels probabilities 903 sum to 1 per-example. 904 905 Args: 906 weights (Tensor): Tensor of shape `[num_classes, dim]`. 907 biases (Tensor): Tensor of shape `[num_classes]`. 908 labels (Tensor): Tensor of shape `[batch_size, num_true]`. The labels classes. 909 logits (Tensor): Tensor of shape `[batch_size, dim]`. The forward 910 activations of the input network. 911 num_true (int): The number of labels classes per training example. 912 sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`, 913 `sampled_expected_count`) returned by a `UniformCandidateSampler` function. 914 subtract_log_q: A `bool`. whether to subtract the log expected count of 915 the labels in the sample to get the logits of the true labels. Default: True. 916 Returns: 917 out_logits: `Tensor` object with shape 918 `[batch_size, num_true + num_sampled]` 919 out_labels: A tensor object with the same shape as `out_logits`. 920 """ 921 922 if not labels.dtype == mstype.int32: 923 labels = self.cast(labels, mstype.int32) 924 labels = self.reshape(labels, (-1, num_true)) 925 labels_flat = self.reshape(labels, (-1,)) 926 927 # Sample the negative labels. 928 # sampled shape: [num_sampled] tensor 929 # true_expected_count shape is [batch_size, 1] tensor 930 # sampled_expected_count shape is [num_sampled] tensor 931 if sampled_values is None: 932 sampled_values = self.sampler(labels) 933 934 (sampled, true_expected_count, sampled_expected_count) = sampled_values 935 936 if not sampled.dtype == mstype.int32: 937 sampled = self.cast(sampled, mstype.int32) 938 all_ids = self.concat_dim0((labels_flat, sampled)) 939 all_w = self.gather_v2(weights, all_ids, 0) 940 941 n_true = self.shape(labels_flat)[0] 942 n_sampled = self.shape(sampled)[0] 943 n_dim = self.shape(all_w)[1] 944 945 true_w = self.slice_op(all_w, [0, 0], [n_true, n_dim]) 946 sampled_w = self.slice_op(all_w, [n_true, 0], [n_sampled, n_dim]) 947 sampled_logits = self.matmul(logits, sampled_w) 948 949 all_b = self.gather_v2(biases, all_ids, 0) 950 true_b = self.slice_op(all_b, [0], [n_true]) 951 sampled_b = self.slice_op(all_b, [n_true], [n_sampled]) 952 953 new_true_w_shape = (-1, num_true, n_dim) 954 row_wise_dots = self.mul(self.expand_dims(logits, 1), 955 self.reshape(true_w, new_true_w_shape)) 956 957 # We want the row-wise dot plus biases which yields a 958 # [batch_size, num_true] tensor of true_logits. 959 dots_as_matrix = self.reshape(row_wise_dots, (-1, n_dim)) 960 true_logits = self.reshape(self.reduce_sum(dots_as_matrix, 1), (-1, num_true)) 961 true_b = self.reshape(true_b, (-1, num_true)) 962 true_logits += true_b 963 sampled_logits += sampled_b 964 965 if subtract_log_q: 966 # Subtract log of Q(l), prior probability that l appears in sampled. 967 true_logits -= self.log(true_expected_count) 968 sampled_logits -= self.log(sampled_expected_count) 969 970 # Construct output logits and labels. The true labels/logits start at col 0. 971 out_logits = self.concat_dim1((true_logits, sampled_logits)) 972 973 # true_logits is a float tensor, ones_like(true_logits) is a float 974 # tensor of ones. We then divide by num_true to ensure the per-example 975 # labels sum to 1.0, i.e. form a proper probability distribution. 976 out_labels = self.concat_dim1(( 977 self.ones_like(true_logits) / num_true, 978 self.zeros_like(sampled_logits) 979 )) 980 return out_logits, out_labels 981 982 983class BCELoss(LossBase): 984 r""" 985 BCELoss creates a criterion to measure the binary cross entropy between the true labels and predicted labels. 986 987 Set the predicted labels as :math:`x`, true labels as :math:`y`, the output loss as :math:`\ell(x, y)`. 988 Let, 989 990 .. math:: 991 L = \{l_1,\dots,l_N\}^\top, \quad 992 l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right] 993 994 Then, 995 996 .. math:: 997 \ell(x, y) = \begin{cases} 998 L, & \text{if reduction} = \text{'none';}\\ 999 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 1000 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 1001 \end{cases} 1002 1003 Note: 1004 Note that the predicted labels should always be the output of sigmoid and the true labels should be numbers 1005 between 0 and 1. 1006 1007 Args: 1008 weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. 1009 And it must have the same shape and data type as `inputs`. Default: None 1010 reduction (str): Specifies the reduction to be applied to the output. 1011 Its value must be one of 'none', 'mean', 'sum'. Default: 'none'. 1012 1013 Inputs: 1014 - **logits** (Tensor) - The input tensor with shape :math:`(N, *)` where :math:`*` means, any number 1015 of additional dimensions. The data type must be float16 or float32. 1016 - **labels** (Tensor) - The label tensor with shape :math:`(N, *)`, the same shape and data type as `logits`. 1017 1018 Outputs: 1019 Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `logits`. 1020 Otherwise, the output is a scalar. 1021 1022 Raises: 1023 TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 not float32. 1024 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 1025 ValueError: If shape of `logits` is not the same as `labels` or `weight` (if given). 1026 1027 Supported Platforms: 1028 ``Ascend`` ``GPU`` ``CPU`` 1029 1030 Examples: 1031 >>> weight = Tensor(np.array([[1.0, 2.0, 3.0], [4.0, 3.3, 2.2]]), mindspore.float32) 1032 >>> loss = nn.BCELoss(weight=weight, reduction='mean') 1033 >>> logits = Tensor(np.array([[0.1, 0.2, 0.3], [0.5, 0.7, 0.9]]), mindspore.float32) 1034 >>> labels = Tensor(np.array([[0, 1, 0], [0, 0, 1]]), mindspore.float32) 1035 >>> output = loss(logits, labels) 1036 >>> print(output) 1037 1.8952923 1038 """ 1039 1040 def __init__(self, weight=None, reduction='none'): 1041 """Initialize BCELoss.""" 1042 super(BCELoss, self).__init__() 1043 self.binary_cross_entropy = P.BinaryCrossEntropy(reduction=reduction) 1044 self.weight_one = weight is None 1045 if not self.weight_one: 1046 self.weight = weight 1047 else: 1048 self.ones = P.OnesLike() 1049 1050 def construct(self, logits, labels): 1051 _check_is_tensor('logits', logits, self.cls_name) 1052 _check_is_tensor('labels', labels, self.cls_name) 1053 if self.weight_one: 1054 weight = self.ones(logits) 1055 else: 1056 weight = self.weight 1057 loss = self.binary_cross_entropy(logits, labels, weight) 1058 return loss 1059 1060 1061@constexpr 1062def _check_reduced_shape_valid(ori_shape, reduced_shape, axis, cls_name): 1063 """Internal function, used to check whether the reduced shape meets the requirements.""" 1064 validator.check_reduce_shape(ori_shape, reduced_shape, axis, cls_name) 1065 1066 1067class CosineEmbeddingLoss(LossBase): 1068 r""" 1069 CosineEmbeddingLoss creates a criterion to measure the similarity between two tensors using cosine distance. 1070 1071 Given two tensors :math:`x1`, :math:`x2`, and a Tensor label :math:`y` with values 1 or -1: 1072 1073 .. math:: 1074 loss(x_1, x_2, y) = \begin{cases} 1075 1-cos(x_1, x_2), & \text{if } y = 1\\ 1076 max(0, cos(x_1, x_2)-margin), & \text{if } y = -1\\ 1077 \end{cases} 1078 1079 Args: 1080 margin (float): Should be in [-1.0, 1.0]. Default 0.0. 1081 reduction (str): Specifies which reduction to be applied to the output. It must be one of 1082 "none", "mean", and "sum", meaning no reduction, reduce mean and sum on output, respectively. Default "mean". 1083 1084 Inputs: 1085 - **logits_x1** (Tensor) - Tensor of shape :math:`(N, *)` where :math:`*` means, any number 1086 of additional dimensions. 1087 - **logits_x2** (Tensor) - Tensor of shape :math:`(N, *)`, same shape and dtype as `logits_x1`. 1088 - **labels** (Tensor) - Contains value 1 or -1. Suppose the shape of `logits_x1` is 1089 :math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `labels` must be :math:`(x_1, x_3, x_4, ..., x_R)`. 1090 1091 Outputs: 1092 Tensor or Scalar, if `reduction` is "none", its shape is the same as `labels`. 1093 Otherwise, a scalar value will be returned. 1094 1095 Raises: 1096 TypeError: If `margin` is not a float. 1097 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 1098 ValueError: If `margin` is not in range [-1, 1]. 1099 1100 Supported Platforms: 1101 ``Ascend`` ``GPU`` ``CPU`` 1102 1103 Examples: 1104 >>> logits_x1 = Tensor(np.array([[0.3, 0.8], [0.4, 0.3]]), mindspore.float32) 1105 >>> logits_x2 = Tensor(np.array([[0.4, 1.2], [-0.4, -0.9]]), mindspore.float32) 1106 >>> labels = Tensor(np.array([1, -1]), mindspore.int32) 1107 >>> cosine_embedding_loss = nn.CosineEmbeddingLoss() 1108 >>> output = cosine_embedding_loss(logits_x1, logits_x2, labels) 1109 >>> print(output) 1110 0.0003425479 1111 """ 1112 def __init__(self, margin=0.0, reduction="mean"): 1113 """Initialize CosineEmbeddingLoss.""" 1114 super(CosineEmbeddingLoss, self).__init__(reduction) 1115 self.reduce_sum = P.ReduceSum() 1116 self.maximum = P.Maximum() 1117 validator.check_value_type("margin", margin, [float], self.cls_name) 1118 self.margin = validator.check_float_range(margin, -1.0, 1.0, Rel.INC_BOTH, "margin", self.cls_name) 1119 1120 def construct(self, logits_x1, logits_x2, labels): 1121 _check_is_tensor('logits_x1', logits_x1, self.cls_name) 1122 _check_is_tensor('logits_x2', logits_x2, self.cls_name) 1123 _check_is_tensor('labels', labels, self.cls_name) 1124 F.same_type_shape(logits_x1, logits_x2) 1125 _check_reduced_shape_valid(F.shape(logits_x1), F.shape(labels), (1,), self.cls_name) 1126 # if labels > 0, 1-cosine(logits_x1, logits_x2) 1127 # else, max(0, cosine(logits_x1, logits_x2)-margin) 1128 prod_sum = self.reduce_sum(logits_x1 * logits_x2, (1,)) 1129 square1 = self.reduce_sum(F.square(logits_x1), (1,)) 1130 square2 = self.reduce_sum(F.square(logits_x2), (1,)) 1131 denom = F.sqrt(square1) * F.sqrt(square2) 1132 cosine = prod_sum / denom 1133 1134 pos_value = 1.0 - cosine 1135 neg_value = self.maximum(cosine - self.margin, 0.0) 1136 zeros = F.zeros_like(cosine) 1137 pos_part = F.select(labels == 1, pos_value, zeros) 1138 neg_part = F.select(labels == -1, neg_value, zeros) 1139 output_unreduced = pos_part + neg_part 1140 1141 return self.get_loss(output_unreduced) 1142 1143 1144class BCEWithLogitsLoss(LossBase): 1145 r""" 1146 Adds sigmoid activation function to input logits, and uses the given logits to compute binary cross entropy 1147 between the logits and the labels. 1148 1149 Sets input `logits` as :math:`X`, input `labels` as :math:`Y`, output as :math:`L`. Then, 1150 1151 .. math:: 1152 p_{ij} = sigmoid(X_{ij}) = \frac{1}{1 + e^{-X_{ij}}} 1153 1154 .. math:: 1155 L_{ij} = -[Y_{ij} \cdot log(p_{ij}) + (1 - Y_{ij}) \cdot log(1 - p_{ij})] 1156 1157 Then, 1158 1159 .. math:: 1160 \ell(x, y) = \begin{cases} 1161 L, & \text{if reduction} = \text{'none';}\\ 1162 \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ 1163 \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} 1164 \end{cases} 1165 1166 Args: 1167 reduction (str): Type of reduction to be applied to loss. The optional values are 'mean', 'sum', and 'none'. 1168 If 'none', do not perform reduction. Default:'mean'. 1169 weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. 1170 If not None, it can be broadcast to a tensor with shape of `logits`, 1171 data type must be float16 or float32. Default: None. 1172 pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the 1173 number of classes. If not None, it must can be broadcast to a tensor with shape of `logits`, 1174 data type must be float16 or float32. Default: None. 1175 1176 Inputs: 1177 - **logits** (Tensor) - Input logits with shape :math:`(N, *)` where :math:`*` means, any number 1178 of additional dimensions. The data type must be float16 or float32. 1179 - **labels** (Tensor) - Ground truth label with shape :math:`(N, *)`, same shape and dtype as `logits`. 1180 1181 Outputs: 1182 Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`. 1183 Otherwise, a scalar value will be returned. 1184 1185 Raises: 1186 TypeError: If data type of `logits` or `labels` is neither float16 nor float32. 1187 TypeError: If `weight` or `pos_weight` is a parameter. 1188 TypeError: If data type of `weight` or `pos_weight` is neither float16 nor float32. 1189 ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`. 1190 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 1191 1192 Supported Platforms: 1193 ``Ascend`` ``GPU`` 1194 1195 Examples: 1196 >>> logits = Tensor(np.array([[-0.8, 1.2, 0.7], [-0.1, -0.4, 0.7]]).astype(np.float32)) 1197 >>> labels = Tensor(np.array([[0.3, 0.8, 1.2], [-0.6, 0.1, 2.2]]).astype(np.float32)) 1198 >>> loss = nn.BCEWithLogitsLoss() 1199 >>> output = loss(logits, labels) 1200 >>> print(output) 1201 0.3463612 1202 """ 1203 1204 def __init__(self, reduction='mean', weight=None, pos_weight=None): 1205 """Initialize BCEWithLogitsLoss.""" 1206 super(BCEWithLogitsLoss, self).__init__() 1207 self.bce_with_logits_loss = P.BCEWithLogitsLoss(reduction=reduction) 1208 if isinstance(weight, Parameter): 1209 raise TypeError(f"For '{self.cls_name}', the 'weight' can not be a Parameter.") 1210 if isinstance(pos_weight, Parameter): 1211 raise TypeError(f"For '{self.cls_name}', the 'pos_weight' can not be a Parameter.") 1212 self.weight = weight 1213 self.pos_weight = pos_weight 1214 self.ones = P.OnesLike() 1215 1216 def construct(self, logits, labels): 1217 _check_is_tensor('logits', logits, self.cls_name) 1218 _check_is_tensor('labels', labels, self.cls_name) 1219 ones_input = self.ones(logits) 1220 if self.weight is not None: 1221 weight = self.weight 1222 else: 1223 weight = ones_input 1224 if self.pos_weight is not None: 1225 pos_weight = self.pos_weight 1226 else: 1227 pos_weight = ones_input 1228 loss = self.bce_with_logits_loss(logits, labels, weight, pos_weight) 1229 return loss 1230 1231 1232@constexpr 1233def _check_ndim(logits_nidm, labels_ndim, prime_name=None): 1234 '''Internal function, used to check whether the dimension of logits and labels meets the requirements.''' 1235 msg_prefix = f'For \'{prime_name}\', the' if prime_name else "The" 1236 if logits_nidm < 2 or logits_nidm > 4: 1237 raise ValueError(f"{msg_prefix} dimensions of 'logits' should be in [2, 4], but got" 1238 f"dimension of 'logits' {logits_nidm}.") 1239 if labels_ndim < 2 or labels_ndim > 4: 1240 raise ValueError(f"{msg_prefix} dimensions of 'labels' should be in [2, 4], but got" 1241 f"dimension of 'labels' {labels_ndim}.") 1242 if logits_nidm != labels_ndim: 1243 raise ValueError(f"{msg_prefix} dimensions of 'logits' and 'labels' must be equal, but got" 1244 f"dimension of 'logits' {logits_nidm} and dimension of 'labels' {labels_ndim}.") 1245 1246 1247@constexpr 1248def _check_channel_and_shape(logits, labels, prime_name=None): 1249 '''Internal function, used to check whether the channels or shape of logits and labels meets the requirements.''' 1250 msg_prefix = f'For \'{prime_name}\', the' if prime_name else "The" 1251 if logits == 1: 1252 raise ValueError(f"{msg_prefix} single channel prediction is not supported, but got {logits}.") 1253 if labels not in (1, logits): 1254 raise ValueError(f"{msg_prefix} channel of 'labels' must be one or the 'labels' must be the same as that of " 1255 f"the 'logits'. If there is only one channel, its value should be in the range [0, C-1], " 1256 f"where C is the number of classes " 1257 f"inferred from 'logits': C={logits}, but got 'labels': {labels}.") 1258 1259 1260@constexpr 1261def _check_input_dtype(labels_dtype, cls_name): 1262 """Internal function, used to check whether the data type of labels meets the requirements.""" 1263 validator.check_type_name("labels", labels_dtype, 1264 [mstype.int32, mstype.int64, mstype.float16, mstype.float32], cls_name) 1265 1266 1267class FocalLoss(LossBase): 1268 r""" 1269 The loss function proposed by Kaiming team in their paper ``Focal Loss for Dense Object Detection`` improves the 1270 effect of image object detection. It is a loss function to solve the imbalance of categories and the difference of 1271 classification difficulty. If you want to learn more, please refer to the paper. 1272 `Focal Loss for Dense Object Detection <https://arxiv.org/pdf/1708.02002.pdf>`_. The function is shown as follows: 1273 1274 .. math:: 1275 FL(p_t) = -(1-p_t)^\gamma log(p_t) 1276 1277 Args: 1278 gamma (float): Gamma is used to adjust the steepness of weight curve in focal loss. Default: 2.0. 1279 weight (Union[Tensor, None]): A rescaling weight applied to the loss of each batch element. The dimension of 1280 weight should be 1. If None, no weight is applied. Default: None. 1281 reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". 1282 If "none", do not perform reduction. Default: "mean". 1283 1284 Inputs: 1285 - **logits** (Tensor) - Tensor of shape should be :math:`(B, C)` or :math:`(B, C, H)` or :math:`(B, C, H, W)`. 1286 Where :math:`C` is the number of classes. Its value is greater than 1. If the shape is :math:`(B, C, H, W)` 1287 or :math:`(B, C, H)`, the :math:`H` or product of :math:`H` and :math:`W` should be the same as labels. 1288 - **labels** (Tensor) - Tensor of shape should be :math:`(B, C)` or :math:`(B, C, H)` or :math:`(B, C, H, W)`. 1289 The value of :math:`C` is 1 or it needs to be the same as predict's :math:`C`. If :math:`C` is not 1, 1290 the shape of target should be the same as that of predict, where :math:`C` is the number of classes. 1291 If the shape is :math:`(B, C, H, W)` or :math:`(B, C, H)`, the :math:`H` or product of :math:`H` 1292 and :math:`W` should be the same as logits. 1293 1294 Outputs: 1295 Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`. 1296 Otherwise, a scalar value will be returned. 1297 1298 Raises: 1299 TypeError: If the data type of `gamma` is not a float. 1300 TypeError: If `weight` is not a Tensor. 1301 ValueError: If `labels` dim is different from `logits`. 1302 ValueError: If `labels` channel is not 1 and `labels` shape is different from `logits`. 1303 ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. 1304 1305 Supported Platforms: 1306 ``Ascend`` 1307 1308 Example: 1309 >>> logits = Tensor([[0.8, 1.4], [0.5, 0.9], [1.2, 0.9]], mstype.float32) 1310 >>> labels = Tensor([[1], [1], [0]], mstype.int32) 1311 >>> focalloss = nn.FocalLoss(weight=Tensor([1, 2]), gamma=2.0, reduction='mean') 1312 >>> output = focalloss(logits, labels) 1313 >>> print(output) 1314 0.12516622 1315 """ 1316 1317 def __init__(self, weight=None, gamma=2.0, reduction='mean'): 1318 """Initialize FocalLoss.""" 1319 super(FocalLoss, self).__init__(reduction=reduction) 1320 1321 self.gamma = validator.check_value_type("gamma", gamma, [float]) 1322 if weight is not None and not isinstance(weight, Tensor): 1323 raise TypeError(f"For '{self.cls_name}', the type of 'weight' should be a Tensor, " 1324 f"but got {type(weight).__name__}.") 1325 if isinstance(weight, Tensor) and weight.ndim != 1: 1326 raise ValueError(f"For '{self.cls_name}', the dimension of 'weight' should be 1, but got {weight.ndim}.") 1327 self.weight = weight 1328 self.expand_dims = P.ExpandDims() 1329 self.gather_d = P.GatherD() 1330 self.squeeze = P.Squeeze(axis=1) 1331 self.tile = P.Tile() 1332 self.cast = P.Cast() 1333 self.dtype = P.DType() 1334 self.logsoftmax = nn.LogSoftmax(1) 1335 1336 def construct(self, logits, labels): 1337 _check_is_tensor('logits', logits, self.cls_name) 1338 _check_is_tensor('labels', labels, self.cls_name) 1339 labelss = labels 1340 _check_ndim(logits.ndim, labelss.ndim) 1341 _check_channel_and_shape(logits.shape[1], labelss.shape[1]) 1342 _check_input_dtype(self.dtype(labelss), self.cls_name) 1343 1344 if logits.ndim > 2: 1345 logits = logits.view(logits.shape[0], logits.shape[1], -1) 1346 labelss = labelss.view(labelss.shape[0], labelss.shape[1], -1) 1347 else: 1348 logits = self.expand_dims(logits, 2) 1349 labelss = self.expand_dims(labelss, 2) 1350 1351 log_probability = self.logsoftmax(logits) 1352 1353 if labels.shape[1] == 1: 1354 log_probability = self.gather_d(log_probability, 1, self.cast(labelss, mindspore.int32)) 1355 log_probability = self.squeeze(log_probability) 1356 1357 probability = F.exp(log_probability) 1358 1359 if self.weight is not None: 1360 convert_weight = self.weight[None, :, None] 1361 convert_weight = self.tile(convert_weight, (labelss.shape[0], 1, labelss.shape[2])) 1362 if labels.shape[1] == 1: 1363 convert_weight = self.gather_d(convert_weight, 1, self.cast(labelss, mindspore.int32)) 1364 convert_weight = self.squeeze(convert_weight) 1365 log_probability = log_probability * convert_weight 1366 1367 weight = F.pows(-1 * probability + 1.0, self.gamma) 1368 if labels.shape[1] == 1: 1369 loss = (-1 * weight * log_probability).mean(axis=1) 1370 else: 1371 loss = (-1 * weight * labelss * log_probability).mean(axis=-1) 1372 1373 return self.get_loss(loss) 1374