1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Losses for Gtflow Estimator and Batch Estimator.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.framework import dtypes 22from tensorflow.python.framework import ops 23from tensorflow.python.ops import array_ops 24from tensorflow.python.ops import control_flow_ops 25from tensorflow.python.ops import math_ops 26from tensorflow.python.ops import nn 27from tensorflow.python.ops.losses import losses 28 29 30def per_example_squared_hinge_loss(labels, weights, predictions): 31 loss = losses.hinge_loss(labels=labels, logits=predictions, weights=weights) 32 return math_ops.square(loss), control_flow_ops.no_op() 33 34 35def per_example_logistic_loss(labels, weights, predictions): 36 """Logistic loss given labels, example weights and predictions. 37 38 Args: 39 labels: Rank 2 (N, 1) tensor of per-example labels. 40 weights: Rank 2 (N, 1) tensor of per-example weights. 41 predictions: Rank 2 (N, 1) tensor of per-example predictions. 42 43 Returns: 44 loss: A Rank 2 (N, 1) tensor of per-example logistic loss. 45 update_op: An update operation to update the loss's internal state. 46 """ 47 labels = math_ops.cast(labels, dtypes.float32) 48 unweighted_loss = nn.sigmoid_cross_entropy_with_logits( 49 labels=labels, logits=predictions) 50 return unweighted_loss * weights, control_flow_ops.no_op() 51 52# MUST USE WITH HESSIAN REGULARIZATION, 53# This loss can have zero hessian, so it must be used with l2 or min_node_weight 54# regularization. 55# An example config is 56# learner_config.constraints.min_node_weight = 1 / num_examples_per_layer 57# learner_config.regularization.l2 = 1.0 / num_examples_per_layer 58# TODO(nponomareva): make it multidimensional so we can estimate several 59# quantiles at once. 60def per_example_quantile_regression_loss(labels, weights, predictions, 61 quantile): 62 """Smoothed loss for quantile regression. 63 64 The standard quantile regression loss is quantile*(y-y') when y>y' and 65 (quantile-1)*(y-y') otherwise, y' is a prediction, y is a label. The impl 66 below is this loss but squared in the region where the loss value < 1. 67 68 Args: 69 labels: Rank 2 (N, D) tensor of per-example labels. 70 weights: Rank 2 (N, 1) tensor of per-example weights. 71 predictions: Rank 2 (N, D) tensor of per-example predictions. 72 quantile: The quantile to use. 73 74 Returns: 75 loss: A Rank 2 (N, 1) tensor of per-example quantile loss. 76 update_op: An update operation to update the loss's internal state. 77 """ 78 labels = math_ops.cast(labels, dtypes.float32) 79 error = labels - predictions 80 square_loss_right = array_ops.where(error * quantile < 1.0, 81 math_ops.square(quantile * error), 82 quantile * error) 83 square_loss_left = array_ops.where(error * (quantile - 1) < 1, 84 math_ops.square((quantile - 1) * error), 85 (quantile - 1) * error) 86 87 unweighted_loss = array_ops.where(error > 0, square_loss_right, 88 square_loss_left) 89 if weights is None: 90 return unweighted_loss, control_flow_ops.no_op() 91 else: 92 return unweighted_loss * weights, control_flow_ops.no_op() 93 94# This is classical form of Maximum entropy loss, that is twice differentiable 95# (sparse_softmax_cross_entropy which is what we go for is not twice 96# differentiable). 97def per_example_maxent_loss(labels, weights, logits, num_classes, eps=1e-15): 98 """Maximum entropy loss for multiclass problems. 99 100 Maximum entropy is a generalization of logistic loss for the case when more 101 than 2 classes are present. 102 103 Args: 104 labels: Rank 2 (N, 1) or Rank 1 (N) tensor of per-example labels. 105 weights: Rank 2 (N, 1) tensor of per-example weights. 106 logits: Rank 2 (N, K) tensor of per-example predictions, K - num of 107 classes. 108 num_classes: number of classes in classification task. Used to expand label 109 indices into one-hot encodings. 110 eps: tolerance, used as a minimum possible value. 111 112 Returns: 113 loss: A Rank 2 (N, 1) tensor of per-example maxent loss 114 update_op: An update operation to update the loss's internal state. 115 """ 116 labels = math_ops.cast(labels, dtypes.int64) 117 # If labels are of rank 1, make them rank 2. 118 labels_shape = labels.get_shape() 119 if len(labels_shape) != 2: 120 labels = array_ops.expand_dims(labels, 1) 121 # Labels are indices of classes, convert them to one hot encodings. 122 target_one_hot = array_ops.one_hot(indices=labels, depth=num_classes) 123 labels = math_ops.reduce_sum(input_tensor=target_one_hot, axis=[1]) 124 labels = math_ops.cast(labels, dtypes.float32) 125 126 # Calculate softmax probabilities for each class. 127 unnormalized_probs = math_ops.exp(logits) 128 normalizers = math_ops.reduce_sum(unnormalized_probs, 1, keepdims=True) 129 softmax_predictions = math_ops.divide(unnormalized_probs, 130 math_ops.add(normalizers, eps)) 131 132 # Pull out the probabilities for real label. 133 probs_for_real_class = math_ops.reduce_sum(labels * softmax_predictions, 1) 134 135 # Add handling for values near 0 and 1. 136 zeros = array_ops.zeros_like(probs_for_real_class, dtype=logits.dtype) + eps 137 one_minus_eps = array_ops.ones_like( 138 probs_for_real_class, dtype=logits.dtype) - eps 139 140 # Take maximum(eps, pred) 141 cond = (probs_for_real_class >= eps) 142 probs_for_real_class = array_ops.where(cond, probs_for_real_class, zeros) 143 144 # Take minimum(1-eps, pred) 145 cond = (probs_for_real_class <= 1 - eps) 146 probs_for_real_class = array_ops.where(cond, probs_for_real_class, 147 one_minus_eps) 148 149 unweighted_loss = array_ops.expand_dims(-math_ops.log(probs_for_real_class), 150 1) 151 if weights is None: 152 return unweighted_loss, control_flow_ops.no_op() 153 else: 154 return unweighted_loss * weights, control_flow_ops.no_op() 155 156 157def per_example_squared_loss(labels, weights, predictions): 158 """Squared loss given labels, example weights and predictions. 159 160 Args: 161 labels: Rank 2 (N, D) tensor of per-example labels. 162 weights: Rank 2 (N, 1) tensor of per-example weights. 163 predictions: Rank 2 (N, D) tensor of per-example predictions. 164 165 Returns: 166 loss: A Rank 2 (N, 1) tensor of per-example squared loss. 167 update_op: An update operation to update the loss's internal state. 168 """ 169 unweighted_loss = math_ops.reduce_sum( 170 math_ops.squared_difference(predictions, labels), 1, keepdims=True) 171 172 return unweighted_loss * weights, control_flow_ops.no_op() 173 174 175def per_example_exp_loss(labels, weights, predictions, name=None, eps=0.1): 176 """Trimmed exponential loss given labels, example weights and predictions. 177 178 Note that this is only for binary classification. 179 If logistic loss tries to make sure that the classifier is certain of its 180 predictions, exp loss says: "as long as it got it correct, even barely, i 181 don't care". Can be used on noisy data, or when you don't care about getting 182 the actual probabilities from the model, just the correct label. 183 184 The loss returns is exp(-targets*modified_predictions), where 185 modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive 186 class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in 187 the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps). 188 189 Args: 190 labels: Rank 2 (N, D) tensor of per-example labels. 191 weights: Rank 2 (N, 1) tensor of per-example weights. 192 predictions: Rank 2 (N, D) tensor of per-example predictions. 193 name: A name for the operation (optional). 194 eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b. 195 196 Returns: 197 loss: A Rank 2 (N, 1) tensor of per-example exp loss 198 update_op: An update operation to update the loss's internal state. 199 """ 200 201 def exp_with_logits(name, eps, labels=None, logits=None): 202 """Computes exponential loss given `logits`. 203 204 The loss returns is exp(-targets*modified_predictions), where 205 modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive 206 class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in 207 the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps). 208 209 Args: 210 name: A name for the operation (optional). 211 eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b. 212 labels: A `Tensor` of the same type and shape as `logits`. 213 logits: A `Tensor` of type `float32` or `float64`. 214 215 Returns: 216 A `Tensor` of the same shape as `logits` with the componentwise 217 exponential losses. 218 219 Raises: 220 ValueError: If `logits` and `labels` do not have the same shape. 221 """ 222 with ops.name_scope(name, "exp_loss", [logits, labels]) as name: 223 logits = ops.convert_to_tensor(logits, name="logits") 224 labels = ops.convert_to_tensor(labels, name="labels") 225 try: 226 labels.get_shape().merge_with(logits.get_shape()) 227 except ValueError: 228 raise ValueError("logits and labels must have the same shape (%s vs %s)" 229 % (logits.get_shape(), labels.get_shape())) 230 231 # Default threshold to switch between classes 232 zeros = array_ops.zeros_like(logits, dtype=logits.dtype) 233 ones = array_ops.ones_like(logits, dtype=logits.dtype) 234 neg_ones = -array_ops.ones_like(logits, dtype=logits.dtype) 235 236 # Convert labels to 1 and -1 237 cond_labels = (labels > zeros) 238 labels_converted = array_ops.where(cond_labels, ones, neg_ones) 239 240 # Convert predictions to 1 and -1 241 # The loss we build is min(1, max(-1,ax+b)) 242 # where a=1/eps, b=-1/2eps. 243 244 a = 1.0 / eps 245 b = -1.0 / 2 / eps 246 probs = math_ops.sigmoid(logits) 247 y = a * probs + b 248 # Build max(-1, ax+b) 249 cond = (y < -1) 250 max_res = array_ops.where(cond, neg_ones, y) 251 # Build min part 252 cond = (max_res > 1) 253 min_res = array_ops.where(cond, ones, max_res) 254 preds_converted = min_res 255 return math_ops.exp(-preds_converted * labels_converted) 256 257 labels = math_ops.cast(labels, dtypes.float32) 258 unweighted_loss = exp_with_logits( 259 name=name, eps=eps, labels=labels, logits=predictions) 260 return unweighted_loss * weights, control_flow_ops.no_op() 261 262 263def per_example_full_exp_loss(labels, weights, predictions, name=None): 264 """Full exponential loss given labels, example weights and predictions. 265 266 Note that this is only for binary classification. 267 The loss returns is exp(-targets*logits), where targets are converted to -1 268 and 1. 269 270 Args: 271 labels: Rank 2 (N, D) tensor of per-example labels. 272 weights: Rank 2 (N, 1) tensor of per-example weights. 273 predictions: Rank 2 (N, D) tensor of per-example predictions. 274 name: A name for the operation (optional). 275 276 Returns: 277 loss: A Rank 2 (N, 1) tensor of per-example exp loss 278 update_op: An update operation to update the loss's internal state. 279 """ 280 281 def full_exp_with_logits(name, labels=None, logits=None): 282 """Computes exponential loss given `logits`. 283 284 Args: 285 name: A name for the operation (optional). 286 labels: A `Tensor` of the same type and shape as `logits`. 287 logits: A `Tensor` of type `float32` or `float64`. 288 289 Returns: 290 A `Tensor` of the same shape as `logits` with the componentwise 291 exponential losses. 292 293 Raises: 294 ValueError: If `logits` and `labels` do not have the same shape. 295 """ 296 with ops.name_scope(name, "exp_loss", [logits, labels]) as name: 297 logits = ops.convert_to_tensor(logits, name="logits") 298 labels = ops.convert_to_tensor(labels, name="labels") 299 try: 300 labels.get_shape().merge_with(logits.get_shape()) 301 except ValueError: 302 raise ValueError("logits and labels must have the same shape (%s vs %s)" 303 % (logits.get_shape(), labels.get_shape())) 304 305 # Default threshold of 0 to switch between classes 306 zeros = array_ops.zeros_like(logits, dtype=logits.dtype) 307 ones = array_ops.ones_like(logits, dtype=logits.dtype) 308 neg_ones = -array_ops.ones_like(logits, dtype=logits.dtype) 309 310 # Convert labels to 1 and -1 311 cond_labels = (labels > zeros) 312 labels_converted = array_ops.where(cond_labels, ones, neg_ones) 313 314 return math_ops.exp(-1.0 * logits * labels_converted) 315 316 labels = math_ops.cast(labels, dtypes.float32) 317 unweighted_loss = full_exp_with_logits( 318 name=name, labels=labels, logits=predictions) 319 return unweighted_loss * weights, control_flow_ops.no_op() 320