• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Losses for Gtflow Estimator and Batch Estimator."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import dtypes
22from tensorflow.python.framework import ops
23from tensorflow.python.ops import array_ops
24from tensorflow.python.ops import control_flow_ops
25from tensorflow.python.ops import math_ops
26from tensorflow.python.ops import nn
27from tensorflow.python.ops.losses import losses
28
29
30def per_example_squared_hinge_loss(labels, weights, predictions):
31  loss = losses.hinge_loss(labels=labels, logits=predictions, weights=weights)
32  return math_ops.square(loss), control_flow_ops.no_op()
33
34
35def per_example_logistic_loss(labels, weights, predictions):
36  """Logistic loss given labels, example weights and predictions.
37
38  Args:
39    labels: Rank 2 (N, 1) tensor of per-example labels.
40    weights: Rank 2 (N, 1) tensor of per-example weights.
41    predictions: Rank 2 (N, 1) tensor of per-example predictions.
42
43  Returns:
44    loss: A Rank 2 (N, 1) tensor of per-example logistic loss.
45    update_op: An update operation to update the loss's internal state.
46  """
47  labels = math_ops.cast(labels, dtypes.float32)
48  unweighted_loss = nn.sigmoid_cross_entropy_with_logits(
49      labels=labels, logits=predictions)
50  return unweighted_loss * weights, control_flow_ops.no_op()
51
52# MUST USE WITH HESSIAN REGULARIZATION,
53# This loss can have zero hessian, so it must be used with l2 or min_node_weight
54# regularization.
55# An example config is
56# learner_config.constraints.min_node_weight = 1 / num_examples_per_layer
57# learner_config.regularization.l2 = 1.0 / num_examples_per_layer
58# TODO(nponomareva): make it multidimensional so we can estimate several
59# quantiles at once.
60def per_example_quantile_regression_loss(labels, weights, predictions,
61                                         quantile):
62  """Smoothed loss for quantile regression.
63
64  The standard quantile regression loss is quantile*(y-y') when y>y' and
65  (quantile-1)*(y-y') otherwise, y' is a prediction, y is a label. The impl
66  below is this loss but squared in the region where the loss value < 1.
67
68  Args:
69    labels: Rank 2 (N, D) tensor of per-example labels.
70    weights: Rank 2 (N, 1) tensor of per-example weights.
71    predictions: Rank 2 (N, D) tensor of per-example predictions.
72    quantile: The quantile to use.
73
74  Returns:
75    loss: A Rank 2 (N, 1) tensor of per-example quantile loss.
76    update_op: An update operation to update the loss's internal state.
77  """
78  labels = math_ops.cast(labels, dtypes.float32)
79  error = labels - predictions
80  square_loss_right = array_ops.where(error * quantile < 1.0,
81                                      math_ops.square(quantile * error),
82                                      quantile * error)
83  square_loss_left = array_ops.where(error * (quantile - 1) < 1,
84                                     math_ops.square((quantile - 1) * error),
85                                     (quantile - 1) * error)
86
87  unweighted_loss = array_ops.where(error > 0, square_loss_right,
88                                    square_loss_left)
89  if weights is None:
90    return unweighted_loss, control_flow_ops.no_op()
91  else:
92    return unweighted_loss * weights, control_flow_ops.no_op()
93
94# This is classical form of Maximum entropy loss, that is twice differentiable
95# (sparse_softmax_cross_entropy which is what we go for is not twice
96# differentiable).
97def per_example_maxent_loss(labels, weights, logits, num_classes, eps=1e-15):
98  """Maximum entropy loss for multiclass problems.
99
100  Maximum entropy is a generalization of logistic loss for the case when more
101  than 2 classes are present.
102
103  Args:
104    labels: Rank 2 (N, 1) or Rank 1 (N) tensor of per-example labels.
105    weights: Rank 2 (N, 1) tensor of per-example weights.
106    logits: Rank 2 (N, K) tensor of per-example predictions, K - num of
107    classes.
108    num_classes: number of classes in classification task. Used to expand label
109    indices into one-hot encodings.
110    eps: tolerance, used as a minimum possible value.
111
112  Returns:
113    loss: A Rank 2 (N, 1) tensor of per-example maxent loss
114    update_op: An update operation to update the loss's internal state.
115  """
116  labels = math_ops.cast(labels, dtypes.int64)
117  # If labels are of rank 1, make them rank 2.
118  labels_shape = labels.get_shape()
119  if len(labels_shape) != 2:
120    labels = array_ops.expand_dims(labels, 1)
121  # Labels are indices of classes, convert them to one hot encodings.
122  target_one_hot = array_ops.one_hot(indices=labels, depth=num_classes)
123  labels = math_ops.reduce_sum(input_tensor=target_one_hot, axis=[1])
124  labels = math_ops.cast(labels, dtypes.float32)
125
126  # Calculate softmax probabilities for each class.
127  unnormalized_probs = math_ops.exp(logits)
128  normalizers = math_ops.reduce_sum(unnormalized_probs, 1, keepdims=True)
129  softmax_predictions = math_ops.divide(unnormalized_probs,
130                                        math_ops.add(normalizers, eps))
131
132  # Pull out the probabilities for real label.
133  probs_for_real_class = math_ops.reduce_sum(labels * softmax_predictions, 1)
134
135  # Add handling for values near 0 and 1.
136  zeros = array_ops.zeros_like(probs_for_real_class, dtype=logits.dtype) + eps
137  one_minus_eps = array_ops.ones_like(
138      probs_for_real_class, dtype=logits.dtype) - eps
139
140  # Take maximum(eps, pred)
141  cond = (probs_for_real_class >= eps)
142  probs_for_real_class = array_ops.where(cond, probs_for_real_class, zeros)
143
144  # Take minimum(1-eps, pred)
145  cond = (probs_for_real_class <= 1 - eps)
146  probs_for_real_class = array_ops.where(cond, probs_for_real_class,
147                                         one_minus_eps)
148
149  unweighted_loss = array_ops.expand_dims(-math_ops.log(probs_for_real_class),
150                                          1)
151  if weights is None:
152    return unweighted_loss, control_flow_ops.no_op()
153  else:
154    return unweighted_loss * weights, control_flow_ops.no_op()
155
156
157def per_example_squared_loss(labels, weights, predictions):
158  """Squared loss given labels, example weights and predictions.
159
160  Args:
161    labels: Rank 2 (N, D) tensor of per-example labels.
162    weights: Rank 2 (N, 1) tensor of per-example weights.
163    predictions: Rank 2 (N, D) tensor of per-example predictions.
164
165  Returns:
166    loss: A Rank 2 (N, 1) tensor of per-example squared loss.
167    update_op: An update operation to update the loss's internal state.
168  """
169  unweighted_loss = math_ops.reduce_sum(
170      math_ops.squared_difference(predictions, labels), 1, keepdims=True)
171
172  return unweighted_loss * weights, control_flow_ops.no_op()
173
174
175def per_example_exp_loss(labels, weights, predictions, name=None, eps=0.1):
176  """Trimmed exponential loss given labels, example weights and predictions.
177
178  Note that this is only for binary classification.
179  If logistic loss tries to make sure that the classifier is certain of its
180  predictions, exp loss says: "as long as it got it correct, even barely, i
181  don't care". Can be used on noisy data, or when you don't care about getting
182  the actual probabilities from the model, just the correct label.
183
184  The loss returns is exp(-targets*modified_predictions), where
185  modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive
186  class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in
187  the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps).
188
189  Args:
190    labels: Rank 2 (N, D) tensor of per-example labels.
191    weights: Rank 2 (N, 1) tensor of per-example weights.
192    predictions: Rank 2 (N, D) tensor of per-example predictions.
193    name: A name for the operation (optional).
194    eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b.
195
196  Returns:
197    loss: A Rank 2 (N, 1) tensor of per-example exp loss
198    update_op: An update operation to update the loss's internal state.
199  """
200
201  def exp_with_logits(name, eps, labels=None, logits=None):
202    """Computes exponential loss given `logits`.
203
204    The loss returns is exp(-targets*modified_predictions), where
205    modified_predictions are 1 if sigmoid is >= 0.5+eps (eg we predict positive
206    class), -1 if sigmoid < 0.5-eps (e.g. we predict negative class) and ax+b in
207    the interval 0.5-eps, 0.5+eps, where a = 1/eps, b=1/(2eps).
208
209    Args:
210      name: A name for the operation (optional).
211      eps: For the range (0.5-eps, 0.5+eps) we set the predictions to be ax+b.
212      labels: A `Tensor` of the same type and shape as `logits`.
213      logits: A `Tensor` of type `float32` or `float64`.
214
215    Returns:
216      A `Tensor` of the same shape as `logits` with the componentwise
217      exponential losses.
218
219    Raises:
220      ValueError: If `logits` and `labels` do not have the same shape.
221    """
222    with ops.name_scope(name, "exp_loss", [logits, labels]) as name:
223      logits = ops.convert_to_tensor(logits, name="logits")
224      labels = ops.convert_to_tensor(labels, name="labels")
225      try:
226        labels.get_shape().merge_with(logits.get_shape())
227      except ValueError:
228        raise ValueError("logits and labels must have the same shape (%s vs %s)"
229                         % (logits.get_shape(), labels.get_shape()))
230
231    # Default threshold to switch between classes
232    zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
233    ones = array_ops.ones_like(logits, dtype=logits.dtype)
234    neg_ones = -array_ops.ones_like(logits, dtype=logits.dtype)
235
236    # Convert labels to 1 and -1
237    cond_labels = (labels > zeros)
238    labels_converted = array_ops.where(cond_labels, ones, neg_ones)
239
240    # Convert predictions to 1 and -1
241    # The loss we build is min(1, max(-1,ax+b))
242    # where a=1/eps, b=-1/2eps.
243
244    a = 1.0 / eps
245    b = -1.0 / 2 / eps
246    probs = math_ops.sigmoid(logits)
247    y = a * probs + b
248    # Build max(-1, ax+b)
249    cond = (y < -1)
250    max_res = array_ops.where(cond, neg_ones, y)
251    # Build min part
252    cond = (max_res > 1)
253    min_res = array_ops.where(cond, ones, max_res)
254    preds_converted = min_res
255    return math_ops.exp(-preds_converted * labels_converted)
256
257  labels = math_ops.cast(labels, dtypes.float32)
258  unweighted_loss = exp_with_logits(
259      name=name, eps=eps, labels=labels, logits=predictions)
260  return unweighted_loss * weights, control_flow_ops.no_op()
261
262
263def per_example_full_exp_loss(labels, weights, predictions, name=None):
264  """Full exponential loss given labels, example weights and predictions.
265
266  Note that this is only for binary classification.
267  The loss returns is exp(-targets*logits), where targets are converted to -1
268  and 1.
269
270  Args:
271    labels: Rank 2 (N, D) tensor of per-example labels.
272    weights: Rank 2 (N, 1) tensor of per-example weights.
273    predictions: Rank 2 (N, D) tensor of per-example predictions.
274    name: A name for the operation (optional).
275
276  Returns:
277    loss: A Rank 2 (N, 1) tensor of per-example exp loss
278    update_op: An update operation to update the loss's internal state.
279  """
280
281  def full_exp_with_logits(name, labels=None, logits=None):
282    """Computes exponential loss given `logits`.
283
284    Args:
285      name: A name for the operation (optional).
286      labels: A `Tensor` of the same type and shape as `logits`.
287      logits: A `Tensor` of type `float32` or `float64`.
288
289    Returns:
290      A `Tensor` of the same shape as `logits` with the componentwise
291      exponential losses.
292
293    Raises:
294      ValueError: If `logits` and `labels` do not have the same shape.
295    """
296    with ops.name_scope(name, "exp_loss", [logits, labels]) as name:
297      logits = ops.convert_to_tensor(logits, name="logits")
298      labels = ops.convert_to_tensor(labels, name="labels")
299      try:
300        labels.get_shape().merge_with(logits.get_shape())
301      except ValueError:
302        raise ValueError("logits and labels must have the same shape (%s vs %s)"
303                         % (logits.get_shape(), labels.get_shape()))
304
305    # Default threshold of 0 to switch between classes
306    zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
307    ones = array_ops.ones_like(logits, dtype=logits.dtype)
308    neg_ones = -array_ops.ones_like(logits, dtype=logits.dtype)
309
310    # Convert labels to 1 and -1
311    cond_labels = (labels > zeros)
312    labels_converted = array_ops.where(cond_labels, ones, neg_ones)
313
314    return math_ops.exp(-1.0 * logits * labels_converted)
315
316  labels = math_ops.cast(labels, dtypes.float32)
317  unweighted_loss = full_exp_with_logits(
318      name=name, labels=labels, logits=predictions)
319  return unweighted_loss * weights, control_flow_ops.no_op()
320