1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Built-in loss functions. 16""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import abc 22 23import six 24 25from tensorflow.python.framework import ops 26from tensorflow.python.framework import smart_cond 27from tensorflow.python.keras import backend as K 28from tensorflow.python.keras.utils import losses_utils 29from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object 30from tensorflow.python.keras.utils.generic_utils import serialize_keras_object 31from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable 32from tensorflow.python.ops import array_ops 33from tensorflow.python.ops import math_ops 34from tensorflow.python.ops import nn 35from tensorflow.python.ops.losses import losses_impl 36from tensorflow.python.util.tf_export import keras_export 37from tensorflow.tools.docs import doc_controls 38 39 40@keras_export('keras.losses.Loss') 41class Loss(object): 42 """Loss base class. 43 44 To be implemented by subclasses: 45 * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`. 46 47 Example subclass implementation: 48 ``` 49 class MeanSquaredError(Loss): 50 def call(self, y_true, y_pred): 51 y_pred = ops.convert_to_tensor(y_pred) 52 y_true = math_ops.cast(y_true, y_pred.dtype) 53 return K.mean(math_ops.square(y_pred - y_true), axis=-1) 54 ``` 55 56 Args: 57 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 58 Default value is `SUM_OVER_BATCH_SIZE`. 59 name: Optional name for the op. 60 """ 61 62 def __init__(self, 63 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 64 name=None): 65 self.reduction = reduction 66 self.name = name 67 68 def __call__(self, y_true, y_pred, sample_weight=None): 69 """Invokes the `Loss` instance. 70 71 Args: 72 y_true: Ground truth values. 73 y_pred: The predicted values. 74 sample_weight: Optional `Tensor` whose rank is either 0, or the same rank 75 as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a 76 coefficient for the loss. If a scalar is provided, then the loss is 77 simply scaled by the given value. If `sample_weight` is a tensor of size 78 `[batch_size]`, then the total loss for each sample of the batch is 79 rescaled by the corresponding element in the `sample_weight` vector. If 80 the shape of `sample_weight` matches the shape of `y_pred`, then the 81 loss of each measurable element of `y_pred` is scaled by the 82 corresponding value of `sample_weight`. 83 84 Returns: 85 Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same 86 shape as `y_true`; otherwise, it is scalar. 87 88 Raises: 89 ValueError: If the shape of `sample_weight` is invalid. 90 """ 91 # If we are wrapping a lambda function strip '<>' from the name as it is not 92 # accepted in scope name. 93 scope_name = 'lambda' if self.name == '<lambda>' else self.name 94 with ops.name_scope(scope_name, format(self.__class__.__name__), 95 (y_pred, y_true, sample_weight)): 96 losses = self.call(y_true, y_pred) 97 return losses_utils.compute_weighted_loss( 98 losses, sample_weight, reduction=self.reduction) 99 100 @classmethod 101 def from_config(cls, config): 102 """Instantiates a `Loss` from its config (output of `get_config()`). 103 104 Args: 105 config: Output of `get_config()`. 106 107 Returns: 108 A `Loss` instance. 109 """ 110 return cls(**config) 111 112 def get_config(self): 113 return {'reduction': self.reduction, 'name': self.name} 114 115 @abc.abstractmethod 116 @doc_controls.for_subclass_implementers 117 def call(self, y_true, y_pred): 118 """Invokes the `Loss` instance. 119 120 Args: 121 y_true: Ground truth values, with the same shape as 'y_pred'. 122 y_pred: The predicted values. 123 """ 124 NotImplementedError('Must be implemented in subclasses.') 125 126 127class LossFunctionWrapper(Loss): 128 """Wraps a loss function in the `Loss` class. 129 130 Args: 131 fn: The loss function to wrap, with signature `fn(y_true, y_pred, 132 **kwargs)`. 133 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 134 Default value is `SUM_OVER_BATCH_SIZE`. 135 name: (Optional) name for the loss. 136 **kwargs: The keyword arguments that are passed on to `fn`. 137 """ 138 139 def __init__(self, 140 fn, 141 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 142 name=None, 143 **kwargs): 144 super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name) 145 self.fn = fn 146 self._fn_kwargs = kwargs 147 148 def call(self, y_true, y_pred): 149 """Invokes the `LossFunctionWrapper` instance. 150 151 Args: 152 y_true: Ground truth values. 153 y_pred: The predicted values. 154 155 Returns: 156 Loss values per sample. 157 """ 158 return self.fn(y_true, y_pred, **self._fn_kwargs) 159 160 def get_config(self): 161 config = {} 162 for k, v in six.iteritems(self._fn_kwargs): 163 config[k] = K.eval(v) if is_tensor_or_variable(v) else v 164 base_config = super(LossFunctionWrapper, self).get_config() 165 return dict(list(base_config.items()) + list(config.items())) 166 167 168@keras_export('keras.losses.MeanSquaredError') 169class MeanSquaredError(LossFunctionWrapper): 170 """Computes the mean of squares of errors between labels and predictions. 171 172 For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.] 173 then the mean squared error value is 3/4 (0.75). 174 175 Usage: 176 177 ```python 178 mse = tf.keras.losses.MeanSquaredError() 179 loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.]) 180 print('Loss: ', loss.numpy()) # Loss: 0.75 181 ``` 182 183 Usage with tf.keras API: 184 185 ```python 186 model = tf.keras.Model(inputs, outputs) 187 model.compile('sgd', loss=tf.keras.losses.MeanSquaredError()) 188 ``` 189 """ 190 191 def __init__(self, 192 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 193 name='mean_squared_error'): 194 super(MeanSquaredError, self).__init__( 195 mean_squared_error, name=name, reduction=reduction) 196 197 198@keras_export('keras.losses.MeanAbsoluteError') 199class MeanAbsoluteError(LossFunctionWrapper): 200 """Computes the mean of absolute difference between labels and predictions. 201 202 For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.] 203 then the mean absolute error value is 3/4 (0.75). 204 205 Usage: 206 207 ```python 208 mae = tf.keras.losses.MeanAbsoluteError() 209 loss = mae([0., 0., 1., 1.], [1., 1., 1., 0.]) 210 print('Loss: ', loss.numpy()) # Loss: 0.75 211 ``` 212 213 Usage with tf.keras API: 214 215 ```python 216 model = tf.keras.Model(inputs, outputs) 217 model.compile('sgd', loss=tf.keras.losses.MeanAbsoluteError()) 218 ``` 219 """ 220 221 def __init__(self, 222 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 223 name='mean_absolute_error'): 224 super(MeanAbsoluteError, self).__init__( 225 mean_absolute_error, name=name, reduction=reduction) 226 227 228@keras_export('keras.losses.MeanAbsolutePercentageError') 229class MeanAbsolutePercentageError(LossFunctionWrapper): 230 """Computes the mean absolute percentage error between `y_true` and `y_pred`. 231 232 For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.] 233 then the mean absolute percentage error value is 5e+08. 234 235 Usage: 236 237 ```python 238 mape = tf.keras.losses.MeanAbsolutePercentageError() 239 loss = mape([0., 0., 1., 1.], [1., 1., 1., 0.]) 240 print('Loss: ', loss.numpy()) # Loss: 5e+08 241 ``` 242 243 Usage with tf.keras API: 244 245 ```python 246 model = tf.keras.Model(inputs, outputs) 247 model.compile('sgd', loss=tf.keras.losses.MeanAbsolutePercentageError()) 248 ``` 249 """ 250 251 def __init__(self, 252 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 253 name='mean_absolute_percentage_error'): 254 super(MeanAbsolutePercentageError, self).__init__( 255 mean_absolute_percentage_error, name=name, reduction=reduction) 256 257 258@keras_export('keras.losses.MeanSquaredLogarithmicError') 259class MeanSquaredLogarithmicError(LossFunctionWrapper): 260 """Computes the mean squared logarithmic error between `y_true` and `y_pred`. 261 262 For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.] 263 then the mean squared logarithmic error value is 0.36034. 264 265 Usage: 266 267 ```python 268 msle = tf.keras.losses.MeanSquaredLogarithmicError() 269 loss = msle([0., 0., 1., 1.], [1., 1., 1., 0.]) 270 print('Loss: ', loss.numpy()) # Loss: 0.36034 271 ``` 272 273 Usage with tf.keras API: 274 275 ```python 276 model = tf.keras.Model(inputs, outputs) 277 model.compile('sgd', loss=tf.keras.losses.MeanSquaredLogarithmicError()) 278 ``` 279 """ 280 281 def __init__(self, 282 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 283 name='mean_squared_logarithmic_error'): 284 super(MeanSquaredLogarithmicError, self).__init__( 285 mean_squared_logarithmic_error, name=name, reduction=reduction) 286 287 288@keras_export('keras.losses.BinaryCrossentropy') 289class BinaryCrossentropy(LossFunctionWrapper): 290 """Computes the crossentropy loss between the labels and predictions. 291 292 Use this crossentropy loss function when there are only two label classes 293 (assumed to be 0 and 1). There should be a single floating point value per 294 feature. 295 296 In the snippet below, there is a single floating pointing value per example, 297 and the shape of both `y_pred` and `y_true` are `[batch_size]`. 298 299 Usage: 300 301 ```python 302 bce = tf.keras.losses.BinaryCrossentropy() 303 loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.]) 304 print('Loss: ', loss.numpy()) # Loss: 12.007 305 ``` 306 307 Usage with tf.keras API: 308 309 ```python 310 model = tf.keras.Model(inputs, outputs) 311 model.compile('sgd', loss=tf.keras.losses.BinaryCrossentropy()) 312 ``` 313 314 Args: 315 from_logits: Whether `y_pred` is expected to be a logits tensor. By default, 316 we assume that `y_pred` encodes a probability distribution. 317 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. 318 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 319 Default value is `SUM_OVER_BATCH_SIZE`. 320 name: Optional name for the op. 321 """ 322 323 def __init__(self, 324 from_logits=False, 325 label_smoothing=0, 326 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 327 name='binary_crossentropy'): 328 super(BinaryCrossentropy, self).__init__( 329 binary_crossentropy, 330 name=name, 331 reduction=reduction, 332 from_logits=from_logits, 333 label_smoothing=label_smoothing) 334 self.from_logits = from_logits 335 336 337@keras_export('keras.losses.CategoricalCrossentropy') 338class CategoricalCrossentropy(LossFunctionWrapper): 339 """Computes the crossentropy loss between the labels and predictions. 340 341 Use this crossentropy loss function when there are two or more label classes. 342 We expect labels to be provided in a `one_hot` representation. If you want to 343 provide labels as integers, please use `SparseCategoricalCrossentropy` loss. 344 There should be `# classes` floating point values per feature. 345 346 In the snippet below, there is `# classes` floating pointing values per 347 example. The shape of both `y_pred` and `y_true` are 348 `[batch_size, num_classes]`. 349 350 Usage: 351 352 ```python 353 cce = tf.keras.losses.CategoricalCrossentropy() 354 loss = cce( 355 [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], 356 [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) 357 print('Loss: ', loss.numpy()) # Loss: 0.3239 358 ``` 359 360 Usage with tf.keras API: 361 362 ```python 363 model = tf.keras.Model(inputs, outputs) 364 model.compile('sgd', loss=tf.keras.losses.CategoricalCrossentropy()) 365 ``` 366 367 Args: 368 from_logits: Whether `y_pred` is expected to be a logits tensor. By default, 369 we assume that `y_pred` encodes a probability distribution. 370 label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, 371 meaning the confidence on label values are relaxed. e.g. 372 `label_smoothing=0.2` means that we will use a value of `0.1` for label 373 `0` and `0.9` for label `1`" 374 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 375 Default value is `SUM_OVER_BATCH_SIZE`. 376 name: Optional name for the op. 377 """ 378 379 def __init__(self, 380 from_logits=False, 381 label_smoothing=0, 382 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 383 name='categorical_crossentropy'): 384 super(CategoricalCrossentropy, self).__init__( 385 categorical_crossentropy, 386 name=name, 387 reduction=reduction, 388 from_logits=from_logits, 389 label_smoothing=label_smoothing) 390 391 392@keras_export('keras.losses.SparseCategoricalCrossentropy') 393class SparseCategoricalCrossentropy(LossFunctionWrapper): 394 """Computes the crossentropy loss between the labels and predictions. 395 396 Use this crossentropy loss function when there are two or more label classes. 397 We expect labels to be provided as integers. If you want to provide labels 398 using `one-hot` representation, please use `CategoricalCrossentropy` loss. 399 There should be `# classes` floating point values per feature for `y_pred` 400 and a single floating point value per feature for `y_true`. 401 402 In the snippet below, there is a single floating point value per example for 403 `y_true` and `# classes` floating pointing values per example for `y_pred`. 404 The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is 405 `[batch_size, num_classes]`. 406 407 Usage: 408 409 ```python 410 cce = tf.keras.losses.SparseCategoricalCrossentropy() 411 loss = cce( 412 [0, 1, 2], 413 [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) 414 print('Loss: ', loss.numpy()) # Loss: 0.3239 415 ``` 416 417 Usage with tf.keras API: 418 419 ```python 420 model = tf.keras.Model(inputs, outputs) 421 model.compile('sgd', loss=tf.keras.losses.SparseCategoricalCrossentropy()) 422 ```` 423 424 Args: 425 from_logits: Whether `y_pred` is expected to be a logits tensor. By default, 426 we assume that `y_pred` encodes a probability distribution. 427 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 428 Default value is `SUM_OVER_BATCH_SIZE`. 429 name: Optional name for the op. 430 """ 431 432 def __init__(self, 433 from_logits=False, 434 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 435 name=None): 436 super(SparseCategoricalCrossentropy, self).__init__( 437 sparse_categorical_crossentropy, 438 name=name, 439 reduction=reduction, 440 from_logits=from_logits) 441 442 443@keras_export('keras.losses.Hinge') 444class Hinge(LossFunctionWrapper): 445 """Computes the hinge loss between `y_true` and `y_pred`. 446 447 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are 448 provided we will convert them to -1 or 1. 449 450 Usage: 451 452 ```python 453 h = tf.keras.losses.Hinge() 454 loss = h([-1., 1., 1.], [0.6, -0.7, -0.5]) 455 456 # loss = max(0, 1 - y_true * y_pred) = [1.6 + 1.7 + 1.5] / 3 457 458 print('Loss: ', loss.numpy()) # Loss: 1.6 459 ``` 460 461 Usage with tf.keras API: 462 463 ```python 464 model = tf.keras.Model(inputs, outputs) 465 model.compile('sgd', loss=tf.keras.losses.Hinge()) 466 ``` 467 """ 468 469 def __init__(self, 470 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 471 name=None): 472 super(Hinge, self).__init__(hinge, name=name, reduction=reduction) 473 474 475@keras_export('keras.losses.SquaredHinge') 476class SquaredHinge(LossFunctionWrapper): 477 """Computes the squared hinge loss between `y_true` and `y_pred`. 478 479 `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are 480 provided we will convert them to -1 or 1. 481 482 Usage: 483 484 ```python 485 sh = tf.keras.losses.SquaredHinge() 486 loss = sh([-1., 1., 1.], [0.6, -0.7, -0.5]) 487 488 # loss = (max(0, 1 - y_true * y_pred))^2 = [1.6^2 + 1.7^2 + 1.5^2] / 3 489 490 print('Loss: ', loss.numpy()) # Loss: 2.566666 491 ``` 492 493 Usage with tf.keras API: 494 495 ```python 496 model = tf.keras.Model(inputs, outputs) 497 model.compile('sgd', loss=tf.keras.losses.SquaredHinge()) 498 ``` 499 """ 500 501 def __init__(self, 502 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 503 name='squared_hinge'): 504 super(SquaredHinge, self).__init__( 505 squared_hinge, name=name, reduction=reduction) 506 507 508@keras_export('keras.losses.CategoricalHinge') 509class CategoricalHinge(LossFunctionWrapper): 510 """Computes the categorical hinge loss between `y_true` and `y_pred`. 511 512 Usage: 513 514 ```python 515 ch = tf.keras.losses.CategoricalHinge() 516 loss = ch([0., 1., 1.], [1., 0., 1.]) 517 print('Loss: ', loss.numpy()) # Loss: 1.0 518 ``` 519 520 Usage with tf.keras API: 521 522 ```python 523 model = tf.keras.Model(inputs, outputs) 524 model.compile('sgd', loss=tf.keras.losses.CategoricalHinge()) 525 ``` 526 """ 527 528 def __init__(self, 529 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 530 name='categorical_hinge'): 531 super(CategoricalHinge, self).__init__( 532 categorical_hinge, name=name, reduction=reduction) 533 534 535@keras_export('keras.losses.Poisson') 536class Poisson(LossFunctionWrapper): 537 """Computes the Poisson loss between `y_true` and `y_pred`. 538 539 `loss = y_pred - y_true * log(y_pred)` 540 541 Usage: 542 543 ```python 544 p = tf.keras.losses.Poisson() 545 loss = p([1, 9, 2], [4, 8, 12]) 546 print('Loss: ', loss.numpy()) # Loss: -4.63 547 ``` 548 549 Usage with tf.keras API: 550 551 ```python 552 model = tf.keras.Model(inputs, outputs) 553 model.compile('sgd', loss=tf.keras.losses.Poisson()) 554 ``` 555 """ 556 557 def __init__(self, 558 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 559 name='poisson'): 560 super(Poisson, self).__init__(poisson, name=name, reduction=reduction) 561 562 563@keras_export('keras.losses.LogCosh') 564class LogCosh(LossFunctionWrapper): 565 """Computes the logarithm of the hyperbolic cosine of the prediction error. 566 567 `logcosh = log((exp(x) + exp(-x))/2)`, where x is the error (y_pred - y_true) 568 569 Usage: 570 571 ```python 572 l = tf.keras.losses.LogCosh() 573 loss = l([0., 1., 1.], [1., 0., 1.]) 574 print('Loss: ', loss.numpy()) # Loss: 0.289 575 ``` 576 577 Usage with tf.keras API: 578 579 ```python 580 model = tf.keras.Model(inputs, outputs) 581 model.compile('sgd', loss=tf.keras.losses.LogCosh()) 582 ``` 583 """ 584 585 def __init__(self, 586 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 587 name='logcosh'): 588 super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction) 589 590 591@keras_export('keras.losses.KLDivergence') 592class KLDivergence(LossFunctionWrapper): 593 """Computes Kullback Leibler divergence loss between `y_true` and `y_pred`. 594 595 `loss = y_true * log(y_true / y_pred)` 596 597 Usage: 598 599 ```python 600 k = tf.keras.losses.KLDivergence() 601 loss = k([.4, .9, .2], [.5, .8, .12]) 602 print('Loss: ', loss.numpy()) # Loss: -0.043 603 ``` 604 605 Usage with tf.keras API: 606 607 ```python 608 model = tf.keras.Model(inputs, outputs) 609 model.compile('sgd', loss=tf.keras.losses.KLDivergence()) 610 ``` 611 """ 612 613 def __init__(self, 614 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 615 name='kullback_leibler_divergence'): 616 super(KLDivergence, self).__init__( 617 kullback_leibler_divergence, name=name, reduction=reduction) 618 619 620@keras_export('keras.losses.Huber') 621class Huber(LossFunctionWrapper): 622 """Computes the Huber loss between `y_true` and `y_pred`. 623 624 For each value x in `error=y_true-y_pred`, the following is calculated: 625 626 ``` 627 0.5 * x^2 if |x| <= d 628 0.5 * d^2 + d * (|x| - d) if |x| > d 629 ``` 630 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss 631 632 Usage: 633 634 ```python 635 l = tf.keras.losses.Huber() 636 loss = l([0., 1., 1.], [1., 0., 1.]) 637 print('Loss: ', loss.numpy()) # Loss: 0.333 638 ``` 639 640 Usage with tf.keras API: 641 642 ```python 643 model = tf.keras.Model(inputs, outputs) 644 model.compile('sgd', loss=tf.keras.losses.Huber()) 645 ``` 646 647 Args: 648 delta: A float, the point where the Huber loss function changes from a 649 quadratic to linear. 650 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 651 Default value is `SUM_OVER_BATCH_SIZE`. 652 name: Optional name for the op. 653 """ 654 655 def __init__(self, 656 delta=1.0, 657 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 658 name='huber_loss'): 659 super(Huber, self).__init__( 660 huber_loss, name=name, reduction=reduction, delta=delta) 661 662 663@keras_export('keras.metrics.mean_squared_error', 664 'keras.metrics.mse', 665 'keras.metrics.MSE', 666 'keras.losses.mean_squared_error', 667 'keras.losses.mse', 668 'keras.losses.MSE') 669def mean_squared_error(y_true, y_pred): 670 y_pred = ops.convert_to_tensor(y_pred) 671 y_true = math_ops.cast(y_true, y_pred.dtype) 672 return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1) 673 674 675@keras_export('keras.metrics.mean_absolute_error', 676 'keras.metrics.mae', 677 'keras.metrics.MAE', 678 'keras.losses.mean_absolute_error', 679 'keras.losses.mae', 680 'keras.losses.MAE') 681def mean_absolute_error(y_true, y_pred): 682 y_pred = ops.convert_to_tensor(y_pred) 683 y_true = math_ops.cast(y_true, y_pred.dtype) 684 return K.mean(math_ops.abs(y_pred - y_true), axis=-1) 685 686 687@keras_export('keras.metrics.mean_absolute_percentage_error', 688 'keras.metrics.mape', 689 'keras.metrics.MAPE', 690 'keras.losses.mean_absolute_percentage_error', 691 'keras.losses.mape', 692 'keras.losses.MAPE') 693def mean_absolute_percentage_error(y_true, y_pred): # pylint: disable=missing-docstring 694 y_pred = ops.convert_to_tensor(y_pred) 695 y_true = math_ops.cast(y_true, y_pred.dtype) 696 diff = math_ops.abs( 697 (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) 698 return 100. * K.mean(diff, axis=-1) 699 700 701@keras_export('keras.metrics.mean_squared_logarithmic_error', 702 'keras.metrics.msle', 703 'keras.metrics.MSLE', 704 'keras.losses.mean_squared_logarithmic_error', 705 'keras.losses.msle', 706 'keras.losses.MSLE') 707def mean_squared_logarithmic_error(y_true, y_pred): # pylint: disable=missing-docstring 708 y_pred = ops.convert_to_tensor(y_pred) 709 y_true = math_ops.cast(y_true, y_pred.dtype) 710 first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) 711 second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) 712 return K.mean(math_ops.squared_difference(first_log, second_log), axis=-1) 713 714 715def _maybe_convert_labels(y_true): 716 """Converts binary labels into -1/1.""" 717 are_zeros = math_ops.equal(y_true, 0) 718 are_ones = math_ops.equal(y_true, 1) 719 is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones)) 720 721 def _convert_binary_labels(): 722 # Convert the binary labels to -1 or 1. 723 return 2. * y_true - 1. 724 725 updated_y_true = smart_cond.smart_cond(is_binary, 726 _convert_binary_labels, lambda: y_true) 727 return updated_y_true 728 729 730@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge') 731def squared_hinge(y_true, y_pred): 732 """Computes the squared hinge loss between `y_true` and `y_pred`. 733 734 Args: 735 y_true: The ground truth values. `y_true` values are expected to be -1 or 1. 736 If binary (0 or 1) labels are provided we will convert them to -1 or 1. 737 y_pred: The predicted values. 738 739 Returns: 740 Tensor with one scalar loss entry per sample. 741 """ 742 y_pred = ops.convert_to_tensor(y_pred) 743 y_true = math_ops.cast(y_true, y_pred.dtype) 744 y_true = _maybe_convert_labels(y_true) 745 return K.mean( 746 math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1) 747 748 749@keras_export('keras.metrics.hinge', 'keras.losses.hinge') 750def hinge(y_true, y_pred): 751 """Computes the hinge loss between `y_true` and `y_pred`. 752 753 Args: 754 y_true: The ground truth values. `y_true` values are expected to be -1 or 1. 755 If binary (0 or 1) labels are provided we will convert them to -1 or 1. 756 y_pred: The predicted values. 757 758 Returns: 759 Tensor with one scalar loss entry per sample. 760 """ 761 y_pred = ops.convert_to_tensor(y_pred) 762 y_true = math_ops.cast(y_true, y_pred.dtype) 763 y_true = _maybe_convert_labels(y_true) 764 return K.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1) 765 766 767@keras_export('keras.losses.categorical_hinge') 768def categorical_hinge(y_true, y_pred): 769 y_pred = ops.convert_to_tensor(y_pred) 770 y_true = math_ops.cast(y_true, y_pred.dtype) 771 pos = math_ops.reduce_sum(y_true * y_pred, axis=-1) 772 neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1) 773 return math_ops.maximum(0., neg - pos + 1.) 774 775 776def huber_loss(y_true, y_pred, delta=1.0): 777 """Computes Huber loss value. 778 779 For each value x in `error=y_true-y_pred`, the following is calculated: 780 781 ``` 782 0.5 * x^2 if |x| <= d 783 0.5 * d^2 + d * (|x| - d) if |x| > d 784 ``` 785 where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss 786 787 Args: 788 y_true: tensor of true targets. 789 y_pred: tensor of predicted targets. 790 delta: A float, the point where the Huber loss function changes from a 791 quadratic to linear. 792 793 Returns: 794 Tensor with one scalar loss entry per sample. 795 """ 796 y_pred = math_ops.cast(y_pred, dtype=K.floatx()) 797 y_true = math_ops.cast(y_true, dtype=K.floatx()) 798 error = math_ops.subtract(y_pred, y_true) 799 abs_error = math_ops.abs(error) 800 quadratic = math_ops.minimum(abs_error, delta) 801 linear = math_ops.subtract(abs_error, quadratic) 802 return math_ops.add( 803 math_ops.multiply( 804 ops.convert_to_tensor(0.5, dtype=quadratic.dtype), 805 math_ops.multiply(quadratic, quadratic)), 806 math_ops.multiply(delta, linear)) 807 808 809@keras_export('keras.losses.logcosh') 810def logcosh(y_true, y_pred): 811 """Logarithm of the hyperbolic cosine of the prediction error. 812 813 `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and 814 to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly 815 like the mean squared error, but will not be so strongly affected by the 816 occasional wildly incorrect prediction. 817 818 Arguments: 819 y_true: tensor of true targets. 820 y_pred: tensor of predicted targets. 821 822 Returns: 823 Tensor with one scalar loss entry per sample. 824 """ 825 y_pred = ops.convert_to_tensor(y_pred) 826 y_true = math_ops.cast(y_true, y_pred.dtype) 827 828 def _logcosh(x): 829 return x + nn.softplus(-2. * x) - math_ops.log(2.) 830 831 return K.mean(_logcosh(y_pred - y_true), axis=-1) 832 833 834@keras_export('keras.metrics.categorical_crossentropy', 835 'keras.losses.categorical_crossentropy') 836def categorical_crossentropy(y_true, 837 y_pred, 838 from_logits=False, 839 label_smoothing=0): 840 """Computes the categorical crossentropy loss. 841 842 Args: 843 y_true: tensor of true targets. 844 y_pred: tensor of predicted targets. 845 from_logits: Whether `y_pred` is expected to be a logits tensor. By default, 846 we assume that `y_pred` encodes a probability distribution. 847 label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. 848 849 Returns: 850 Categorical crossentropy loss value. 851 """ 852 y_pred = ops.convert_to_tensor(y_pred) 853 y_true = math_ops.cast(y_true, y_pred.dtype) 854 label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) 855 856 def _smooth_labels(): 857 num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype) 858 return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) 859 860 y_true = smart_cond.smart_cond(label_smoothing, 861 _smooth_labels, lambda: y_true) 862 return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits) 863 864 865@keras_export('keras.metrics.sparse_categorical_crossentropy', 866 'keras.losses.sparse_categorical_crossentropy') 867def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1): 868 return K.sparse_categorical_crossentropy( 869 y_true, y_pred, from_logits=from_logits, axis=axis) 870 871 872@keras_export('keras.metrics.binary_crossentropy', 873 'keras.losses.binary_crossentropy') 874def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): # pylint: disable=missing-docstring 875 y_pred = ops.convert_to_tensor(y_pred) 876 y_true = math_ops.cast(y_true, y_pred.dtype) 877 label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) 878 879 def _smooth_labels(): 880 return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing 881 882 y_true = smart_cond.smart_cond(label_smoothing, 883 _smooth_labels, lambda: y_true) 884 return K.mean( 885 K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1) 886 887 888@keras_export('keras.metrics.kullback_leibler_divergence', 889 'keras.metrics.kld', 890 'keras.metrics.KLD', 891 'keras.losses.kullback_leibler_divergence', 892 'keras.losses.kld', 893 'keras.losses.KLD') 894def kullback_leibler_divergence(y_true, y_pred): # pylint: disable=missing-docstring 895 y_pred = ops.convert_to_tensor(y_pred) 896 y_true = math_ops.cast(y_true, y_pred.dtype) 897 y_true = K.clip(y_true, K.epsilon(), 1) 898 y_pred = K.clip(y_pred, K.epsilon(), 1) 899 return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1) 900 901 902@keras_export('keras.metrics.poisson', 'keras.losses.poisson') 903def poisson(y_true, y_pred): 904 y_pred = ops.convert_to_tensor(y_pred) 905 y_true = math_ops.cast(y_true, y_pred.dtype) 906 return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1) 907 908 909# Retaining the legacy namespaces: 'cosine_proximity' and 'cosine'. 910# TODO(psv): Change name of this function to `cosine_similarity` after fixing 911# estimator test. 912@keras_export( 913 'keras.losses.cosine_similarity', 914 v1=[ 915 'keras.metrics.cosine_proximity', 916 'keras.metrics.cosine', 917 'keras.losses.cosine_proximity', 918 'keras.losses.cosine', 919 'keras.losses.cosine_similarity', 920 ]) 921def cosine_proximity(y_true, y_pred, axis=-1): 922 """Computes the cosine similarity between labels and predictions.""" 923 y_true = nn.l2_normalize(y_true, axis=axis) 924 y_pred = nn.l2_normalize(y_pred, axis=axis) 925 return math_ops.reduce_sum(y_true * y_pred, axis=axis) 926 927 928@keras_export('keras.losses.CosineSimilarity') 929class CosineSimilarity(LossFunctionWrapper): 930 """Computes the cosine similarity between `y_true` and `y_pred`. 931 932 Usage: 933 934 ```python 935 cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) 936 loss = cosine_loss([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]]) 937 # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]] 938 # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]] 939 # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] 940 # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) 941 = ((0. + 0.) + (0.5 + 0.5)) / 2 942 943 print('Loss: ', loss.numpy()) # Loss: 0.5 944 ``` 945 946 Usage with tf.keras API: 947 948 ```python 949 model = tf.keras.Model(inputs, outputs) 950 model.compile('sgd', loss=tf.keras.losses.CosineSimilarity(axis=1)) 951 ``` 952 953 Args: 954 axis: (Optional) Defaults to -1. The dimension along which the cosine 955 similarity is computed. 956 reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. 957 Default value is `SUM_OVER_BATCH_SIZE`. 958 name: Optional name for the op. 959 """ 960 961 def __init__(self, 962 axis=-1, 963 reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, 964 name='cosine_similarity'): 965 super(CosineSimilarity, self).__init__( 966 cosine_similarity, reduction=reduction, name=name, axis=axis) 967 968 969# Aliases. 970 971mse = MSE = mean_squared_error 972mae = MAE = mean_absolute_error 973mape = MAPE = mean_absolute_percentage_error 974msle = MSLE = mean_squared_logarithmic_error 975kld = KLD = kullback_leibler_divergence 976cosine_similarity = cosine_proximity 977 978 979def is_categorical_crossentropy(loss): 980 result = ((isinstance(loss, CategoricalCrossentropy) or 981 (isinstance(loss, LossFunctionWrapper) and 982 loss.fn == categorical_crossentropy) or 983 (hasattr(loss, '__name__') and 984 loss.__name__ == 'categorical_crossentropy') or 985 (loss == 'categorical_crossentropy'))) 986 return result 987 988 989@keras_export('keras.losses.serialize') 990def serialize(loss): 991 return serialize_keras_object(loss) 992 993 994@keras_export('keras.losses.deserialize') 995def deserialize(name, custom_objects=None): 996 return deserialize_keras_object( 997 name, 998 module_objects=globals(), 999 custom_objects=custom_objects, 1000 printable_module_name='loss function') 1001 1002 1003@keras_export('keras.losses.get') 1004def get(identifier): 1005 if identifier is None: 1006 return None 1007 if isinstance(identifier, six.string_types): 1008 identifier = str(identifier) 1009 return deserialize(identifier) 1010 if isinstance(identifier, dict): 1011 return deserialize(identifier) 1012 elif callable(identifier): 1013 return identifier 1014 else: 1015 raise ValueError('Could not interpret ' 1016 'loss function identifier:', identifier) 1017 1018 1019LABEL_DTYPES_FOR_LOSSES = { 1020 losses_impl.sparse_softmax_cross_entropy: 'int32', 1021 sparse_categorical_crossentropy: 'int32' 1022} 1023