• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
3 // http://code.google.com/p/ceres-solver/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 //
31 // The LossFunction interface is the way users describe how residuals
32 // are converted to cost terms for the overall problem cost function.
33 // For the exact manner in which loss functions are converted to the
34 // overall cost for a problem, see problem.h.
35 //
36 // For least squares problem where there are no outliers and standard
37 // squared loss is expected, it is not necessary to create a loss
38 // function; instead passing a NULL to the problem when adding
39 // residuals implies a standard squared loss.
40 //
41 // For least squares problems where the minimization may encounter
42 // input terms that contain outliers, that is, completely bogus
43 // measurements, it is important to use a loss function that reduces
44 // their associated penalty.
45 //
46 // Consider a structure from motion problem. The unknowns are 3D
47 // points and camera parameters, and the measurements are image
48 // coordinates describing the expected reprojected position for a
49 // point in a camera. For example, we want to model the geometry of a
50 // street scene with fire hydrants and cars, observed by a moving
51 // camera with unknown parameters, and the only 3D points we care
52 // about are the pointy tippy-tops of the fire hydrants. Our magic
53 // image processing algorithm, which is responsible for producing the
54 // measurements that are input to Ceres, has found and matched all
55 // such tippy-tops in all image frames, except that in one of the
56 // frame it mistook a car's headlight for a hydrant. If we didn't do
57 // anything special (i.e. if we used a basic quadratic loss), the
58 // residual for the erroneous measurement will result in extreme error
59 // due to the quadratic nature of squared loss. This results in the
60 // entire solution getting pulled away from the optimimum to reduce
61 // the large error that would otherwise be attributed to the wrong
62 // measurement.
63 //
64 // Using a robust loss function, the cost for large residuals is
65 // reduced. In the example above, this leads to outlier terms getting
66 // downweighted so they do not overly influence the final solution.
67 //
68 // What cost function is best?
69 //
70 // In general, there isn't a principled way to select a robust loss
71 // function. The authors suggest starting with a non-robust cost, then
72 // only experimenting with robust loss functions if standard squared
73 // loss doesn't work.
74 
75 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
76 #define CERES_PUBLIC_LOSS_FUNCTION_H_
77 
78 #include <glog/logging.h>
79 #include "ceres/internal/macros.h"
80 #include "ceres/internal/scoped_ptr.h"
81 #include "ceres/types.h"
82 
83 namespace ceres {
84 
85 class LossFunction {
86  public:
~LossFunction()87   virtual ~LossFunction() {}
88 
89   // For a residual vector with squared 2-norm 'sq_norm', this method
90   // is required to fill in the value and derivatives of the loss
91   // function (rho in this example):
92   //
93   //   out[0] = rho(sq_norm),
94   //   out[1] = rho'(sq_norm),
95   //   out[2] = rho''(sq_norm),
96   //
97   // Here the convention is that the contribution of a term to the
98   // cost function is given by 1/2 rho(s),  where
99   //
100   //   s = ||residuals||^2.
101   //
102   // Calling the method with a negative value of 's' is an error and
103   // the implementations are not required to handle that case.
104   //
105   // Most sane choices of rho() satisfy:
106   //
107   //   rho(0) = 0,
108   //   rho'(0) = 1,
109   //   rho'(s) < 1 in outlier region,
110   //   rho''(s) < 0 in outlier region,
111   //
112   // so that they mimic the least squares cost for small residuals.
113   virtual void Evaluate(double sq_norm, double out[3]) const = 0;
114 };
115 
116 // Some common implementations follow below.
117 //
118 // Note: in the region of interest (i.e. s < 3) we have:
119 //   TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
120 
121 
122 // This corresponds to no robustification.
123 //
124 //   rho(s) = s
125 //
126 // At s = 0: rho = [0, 1, 0].
127 //
128 // It is not normally necessary to use this, as passing NULL for the
129 // loss function when building the problem accomplishes the same
130 // thing.
131 class TrivialLoss : public LossFunction {
132  public:
133   virtual void Evaluate(double, double*) const;
134 };
135 
136 // Scaling
137 // -------
138 // Given one robustifier
139 //   s -> rho(s)
140 // one can change the length scale at which robustification takes
141 // place, by adding a scale factor 'a' as follows:
142 //
143 //   s -> a^2 rho(s / a^2).
144 //
145 // The first and second derivatives are:
146 //
147 //   s -> rho'(s / a^2),
148 //   s -> (1 / a^2) rho''(s / a^2),
149 //
150 // but the behaviour near s = 0 is the same as the original function,
151 // i.e.
152 //
153 //   rho(s) = s + higher order terms,
154 //   a^2 rho(s / a^2) = s + higher order terms.
155 //
156 // The scalar 'a' should be positive.
157 //
158 // The reason for the appearance of squaring is that 'a' is in the
159 // units of the residual vector norm whereas 's' is a squared
160 // norm. For applications it is more convenient to specify 'a' than
161 // its square. The commonly used robustifiers below are described in
162 // un-scaled format (a = 1) but their implementations work for any
163 // non-zero value of 'a'.
164 
165 // Huber.
166 //
167 //   rho(s) = s               for s <= 1,
168 //   rho(s) = 2 sqrt(s) - 1   for s >= 1.
169 //
170 // At s = 0: rho = [0, 1, 0].
171 //
172 // The scaling parameter 'a' corresponds to 'delta' on this page:
173 //   http://en.wikipedia.org/wiki/Huber_Loss_Function
174 class HuberLoss : public LossFunction {
175  public:
HuberLoss(double a)176   explicit HuberLoss(double a) : a_(a), b_(a * a) { }
177   virtual void Evaluate(double, double*) const;
178 
179  private:
180   const double a_;
181   // b = a^2.
182   const double b_;
183 };
184 
185 // Soft L1, similar to Huber but smooth.
186 //
187 //   rho(s) = 2 (sqrt(1 + s) - 1).
188 //
189 // At s = 0: rho = [0, 1, -1/2].
190 class SoftLOneLoss : public LossFunction {
191  public:
SoftLOneLoss(double a)192   explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
193   virtual void Evaluate(double, double*) const;
194 
195  private:
196   // b = a^2.
197   const double b_;
198   // c = 1 / a^2.
199   const double c_;
200 };
201 
202 // Inspired by the Cauchy distribution
203 //
204 //   rho(s) = log(1 + s).
205 //
206 // At s = 0: rho = [0, 1, -1].
207 class CauchyLoss : public LossFunction {
208  public:
CauchyLoss(double a)209   explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
210   virtual void Evaluate(double, double*) const;
211 
212  private:
213   // b = a^2.
214   const double b_;
215   // c = 1 / a^2.
216   const double c_;
217 };
218 
219 // Loss that is capped beyond a certain level using the arc-tangent function.
220 // The scaling parameter 'a' determines the level where falloff occurs.
221 // For costs much smaller than 'a', the loss function is linear and behaves like
222 // TrivialLoss, and for values much larger than 'a' the value asymptotically
223 // approaches the constant value of a * PI / 2.
224 //
225 //   rho(s) = a atan(s / a).
226 //
227 // At s = 0: rho = [0, 1, 0].
228 class ArctanLoss : public LossFunction {
229  public:
ArctanLoss(double a)230   explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
231   virtual void Evaluate(double, double*) const;
232 
233  private:
234   const double a_;
235   // b = 1 / a^2.
236   const double b_;
237 };
238 
239 // Loss function that maps to approximately zero cost in a range around the
240 // origin, and reverts to linear in error (quadratic in cost) beyond this range.
241 // The tolerance parameter 'a' sets the nominal point at which the
242 // transition occurs, and the transition size parameter 'b' sets the nominal
243 // distance over which most of the transition occurs. Both a and b must be
244 // greater than zero, and typically b will be set to a fraction of a.
245 // The slope rho'[s] varies smoothly from about 0 at s <= a - b to
246 // about 1 at s >= a + b.
247 //
248 // The term is computed as:
249 //
250 //   rho(s) = b log(1 + exp((s - a) / b)) - c0.
251 //
252 // where c0 is chosen so that rho(0) == 0
253 //
254 //   c0 = b log(1 + exp(-a / b)
255 //
256 // This has the following useful properties:
257 //
258 //   rho(s) == 0               for s = 0
259 //   rho'(s) ~= 0              for s << a - b
260 //   rho'(s) ~= 1              for s >> a + b
261 //   rho''(s) > 0              for all s
262 //
263 // In addition, all derivatives are continuous, and the curvature is
264 // concentrated in the range a - b to a + b.
265 //
266 // At s = 0: rho = [0, ~0, ~0].
267 class TolerantLoss : public LossFunction {
268  public:
269   explicit TolerantLoss(double a, double b);
270   virtual void Evaluate(double, double*) const;
271 
272  private:
273   const double a_, b_, c_;
274 };
275 
276 // Composition of two loss functions.  The error is the result of first
277 // evaluating g followed by f to yield the composition f(g(s)).
278 // The loss functions must not be NULL.
279 class ComposedLoss : public LossFunction {
280  public:
281   explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
282                         const LossFunction* g, Ownership ownership_g);
283   virtual ~ComposedLoss();
284   virtual void Evaluate(double, double*) const;
285 
286  private:
287   internal::scoped_ptr<const LossFunction> f_, g_;
288   const Ownership ownership_f_, ownership_g_;
289 };
290 
291 // The discussion above has to do with length scaling: it affects the space
292 // in which s is measured. Sometimes you want to simply scale the output
293 // value of the robustifier. For example, you might want to weight
294 // different error terms differently (e.g., weight pixel reprojection
295 // errors differently from terrain errors).
296 //
297 // If rho is the wrapped robustifier, then this simply outputs
298 // s -> a * rho(s)
299 //
300 // The first and second derivatives are, not surprisingly
301 // s -> a * rho'(s)
302 // s -> a * rho''(s)
303 //
304 // Since we treat the a NULL Loss function as the Identity loss
305 // function, rho = NULL is a valid input and will result in the input
306 // being scaled by a. This provides a simple way of implementing a
307 // scaled ResidualBlock.
308 class ScaledLoss : public LossFunction {
309  public:
310   // Constructs a ScaledLoss wrapping another loss function. Takes
311   // ownership of the wrapped loss function or not depending on the
312   // ownership parameter.
ScaledLoss(const LossFunction * rho,double a,Ownership ownership)313   ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
314       rho_(rho), a_(a), ownership_(ownership) { }
315 
~ScaledLoss()316   virtual ~ScaledLoss() {
317     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
318       rho_.release();
319     }
320   }
321   virtual void Evaluate(double, double*) const;
322 
323  private:
324   internal::scoped_ptr<const LossFunction> rho_;
325   const double a_;
326   const Ownership ownership_;
327   CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
328 };
329 
330 // Sometimes after the optimization problem has been constructed, we
331 // wish to mutate the scale of the loss function. For example, when
332 // performing estimation from data which has substantial outliers,
333 // convergence can be improved by starting out with a large scale,
334 // optimizing the problem and then reducing the scale. This can have
335 // better convergence behaviour than just using a loss function with a
336 // small scale.
337 //
338 // This templated class allows the user to implement a loss function
339 // whose scale can be mutated after an optimization problem has been
340 // constructed.
341 //
342 // Example usage
343 //
344 //  Problem problem;
345 //
346 //  // Add parameter blocks
347 //
348 //  CostFunction* cost_function =
349 //    new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
350 //      new UW_Camera_Mapper(data->observations[2*i + 0],
351 //                           data->observations[2*i + 1]));
352 //
353 //  LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
354 //
355 //  problem.AddResidualBlock(cost_function, loss_function, parameters);
356 //
357 //  Solver::Options options;
358 //  scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
359 //
360 //  loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
361 //
362 //  scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
363 //
364 class LossFunctionWrapper : public LossFunction {
365  public:
LossFunctionWrapper(LossFunction * rho,Ownership ownership)366   LossFunctionWrapper(LossFunction* rho, Ownership ownership)
367       : rho_(rho), ownership_(ownership) {
368   }
369 
~LossFunctionWrapper()370   virtual ~LossFunctionWrapper() {
371     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
372       rho_.release();
373     }
374   }
375 
Evaluate(double sq_norm,double out[3])376   virtual void Evaluate(double sq_norm, double out[3]) const {
377     CHECK_NOTNULL(rho_.get());
378     rho_->Evaluate(sq_norm, out);
379   }
380 
Reset(LossFunction * rho,Ownership ownership)381   void Reset(LossFunction* rho, Ownership ownership) {
382     if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
383       rho_.release();
384     }
385     rho_.reset(rho);
386     ownership_ = ownership;
387   }
388 
389  private:
390   internal::scoped_ptr<const LossFunction> rho_;
391   Ownership ownership_;
392   CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
393 };
394 
395 }  // namespace ceres
396 
397 #endif  // CERES_PUBLIC_LOSS_FUNCTION_H_
398