1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2012 Google Inc. All rights reserved.
3 // http://code.google.com/p/ceres-solver/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 // this list of conditions and the following disclaimer in the documentation
12 // and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 // used to endorse or promote products derived from this software without
15 // specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30
31 #include <iomanip>
32 #include <iostream> // NOLINT
33
34 #include "ceres/line_search.h"
35
36 #include "ceres/fpclassify.h"
37 #include "ceres/evaluator.h"
38 #include "ceres/internal/eigen.h"
39 #include "ceres/polynomial.h"
40 #include "ceres/stringprintf.h"
41 #include "glog/logging.h"
42
43 namespace ceres {
44 namespace internal {
45 namespace {
46 // Precision used for floating point values in error message output.
47 const int kErrorMessageNumericPrecision = 8;
48
ValueSample(const double x,const double value)49 FunctionSample ValueSample(const double x, const double value) {
50 FunctionSample sample;
51 sample.x = x;
52 sample.value = value;
53 sample.value_is_valid = true;
54 return sample;
55 };
56
ValueAndGradientSample(const double x,const double value,const double gradient)57 FunctionSample ValueAndGradientSample(const double x,
58 const double value,
59 const double gradient) {
60 FunctionSample sample;
61 sample.x = x;
62 sample.value = value;
63 sample.gradient = gradient;
64 sample.value_is_valid = true;
65 sample.gradient_is_valid = true;
66 return sample;
67 };
68
69 } // namespace
70
71
72 std::ostream& operator<<(std::ostream &os, const FunctionSample& sample);
73
74 // Convenience stream operator for pushing FunctionSamples into log messages.
operator <<(std::ostream & os,const FunctionSample & sample)75 std::ostream& operator<<(std::ostream &os, const FunctionSample& sample) {
76 os << sample.ToDebugString();
77 return os;
78 }
79
LineSearch(const LineSearch::Options & options)80 LineSearch::LineSearch(const LineSearch::Options& options)
81 : options_(options) {}
82
Create(const LineSearchType line_search_type,const LineSearch::Options & options,string * error)83 LineSearch* LineSearch::Create(const LineSearchType line_search_type,
84 const LineSearch::Options& options,
85 string* error) {
86 LineSearch* line_search = NULL;
87 switch (line_search_type) {
88 case ceres::ARMIJO:
89 line_search = new ArmijoLineSearch(options);
90 break;
91 case ceres::WOLFE:
92 line_search = new WolfeLineSearch(options);
93 break;
94 default:
95 *error = string("Invalid line search algorithm type: ") +
96 LineSearchTypeToString(line_search_type) +
97 string(", unable to create line search.");
98 return NULL;
99 }
100 return line_search;
101 }
102
LineSearchFunction(Evaluator * evaluator)103 LineSearchFunction::LineSearchFunction(Evaluator* evaluator)
104 : evaluator_(evaluator),
105 position_(evaluator->NumParameters()),
106 direction_(evaluator->NumEffectiveParameters()),
107 evaluation_point_(evaluator->NumParameters()),
108 scaled_direction_(evaluator->NumEffectiveParameters()),
109 gradient_(evaluator->NumEffectiveParameters()) {
110 }
111
Init(const Vector & position,const Vector & direction)112 void LineSearchFunction::Init(const Vector& position,
113 const Vector& direction) {
114 position_ = position;
115 direction_ = direction;
116 }
117
Evaluate(double x,double * f,double * g)118 bool LineSearchFunction::Evaluate(double x, double* f, double* g) {
119 scaled_direction_ = x * direction_;
120 if (!evaluator_->Plus(position_.data(),
121 scaled_direction_.data(),
122 evaluation_point_.data())) {
123 return false;
124 }
125
126 if (g == NULL) {
127 return (evaluator_->Evaluate(evaluation_point_.data(),
128 f, NULL, NULL, NULL) &&
129 IsFinite(*f));
130 }
131
132 if (!evaluator_->Evaluate(evaluation_point_.data(),
133 f,
134 NULL,
135 gradient_.data(), NULL)) {
136 return false;
137 }
138
139 *g = direction_.dot(gradient_);
140 return IsFinite(*f) && IsFinite(*g);
141 }
142
DirectionInfinityNorm() const143 double LineSearchFunction::DirectionInfinityNorm() const {
144 return direction_.lpNorm<Eigen::Infinity>();
145 }
146
147 // Returns step_size \in [min_step_size, max_step_size] which minimizes the
148 // polynomial of degree defined by interpolation_type which interpolates all
149 // of the provided samples with valid values.
InterpolatingPolynomialMinimizingStepSize(const LineSearchInterpolationType & interpolation_type,const FunctionSample & lowerbound,const FunctionSample & previous,const FunctionSample & current,const double min_step_size,const double max_step_size) const150 double LineSearch::InterpolatingPolynomialMinimizingStepSize(
151 const LineSearchInterpolationType& interpolation_type,
152 const FunctionSample& lowerbound,
153 const FunctionSample& previous,
154 const FunctionSample& current,
155 const double min_step_size,
156 const double max_step_size) const {
157 if (!current.value_is_valid ||
158 (interpolation_type == BISECTION &&
159 max_step_size <= current.x)) {
160 // Either: sample is invalid; or we are using BISECTION and contracting
161 // the step size.
162 return min(max(current.x * 0.5, min_step_size), max_step_size);
163 } else if (interpolation_type == BISECTION) {
164 CHECK_GT(max_step_size, current.x);
165 // We are expanding the search (during a Wolfe bracketing phase) using
166 // BISECTION interpolation. Using BISECTION when trying to expand is
167 // strictly speaking an oxymoron, but we define this to mean always taking
168 // the maximum step size so that the Armijo & Wolfe implementations are
169 // agnostic to the interpolation type.
170 return max_step_size;
171 }
172 // Only check if lower-bound is valid here, where it is required
173 // to avoid replicating current.value_is_valid == false
174 // behaviour in WolfeLineSearch.
175 CHECK(lowerbound.value_is_valid)
176 << std::scientific << std::setprecision(kErrorMessageNumericPrecision)
177 << "Ceres bug: lower-bound sample for interpolation is invalid, "
178 << "please contact the developers!, interpolation_type: "
179 << LineSearchInterpolationTypeToString(interpolation_type)
180 << ", lowerbound: " << lowerbound << ", previous: " << previous
181 << ", current: " << current;
182
183 // Select step size by interpolating the function and gradient values
184 // and minimizing the corresponding polynomial.
185 vector<FunctionSample> samples;
186 samples.push_back(lowerbound);
187
188 if (interpolation_type == QUADRATIC) {
189 // Two point interpolation using function values and the
190 // gradient at the lower bound.
191 samples.push_back(ValueSample(current.x, current.value));
192
193 if (previous.value_is_valid) {
194 // Three point interpolation, using function values and the
195 // gradient at the lower bound.
196 samples.push_back(ValueSample(previous.x, previous.value));
197 }
198 } else if (interpolation_type == CUBIC) {
199 // Two point interpolation using the function values and the gradients.
200 samples.push_back(current);
201
202 if (previous.value_is_valid) {
203 // Three point interpolation using the function values and
204 // the gradients.
205 samples.push_back(previous);
206 }
207 } else {
208 LOG(FATAL) << "Ceres bug: No handler for interpolation_type: "
209 << LineSearchInterpolationTypeToString(interpolation_type)
210 << ", please contact the developers!";
211 }
212
213 double step_size = 0.0, unused_min_value = 0.0;
214 MinimizeInterpolatingPolynomial(samples, min_step_size, max_step_size,
215 &step_size, &unused_min_value);
216 return step_size;
217 }
218
ArmijoLineSearch(const LineSearch::Options & options)219 ArmijoLineSearch::ArmijoLineSearch(const LineSearch::Options& options)
220 : LineSearch(options) {}
221
Search(const double step_size_estimate,const double initial_cost,const double initial_gradient,Summary * summary)222 void ArmijoLineSearch::Search(const double step_size_estimate,
223 const double initial_cost,
224 const double initial_gradient,
225 Summary* summary) {
226 *CHECK_NOTNULL(summary) = LineSearch::Summary();
227 CHECK_GE(step_size_estimate, 0.0);
228 CHECK_GT(options().sufficient_decrease, 0.0);
229 CHECK_LT(options().sufficient_decrease, 1.0);
230 CHECK_GT(options().max_num_iterations, 0);
231 Function* function = options().function;
232
233 // Note initial_cost & initial_gradient are evaluated at step_size = 0,
234 // not step_size_estimate, which is our starting guess.
235 const FunctionSample initial_position =
236 ValueAndGradientSample(0.0, initial_cost, initial_gradient);
237
238 FunctionSample previous = ValueAndGradientSample(0.0, 0.0, 0.0);
239 previous.value_is_valid = false;
240
241 FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0);
242 current.value_is_valid = false;
243
244 // As the Armijo line search algorithm always uses the initial point, for
245 // which both the function value and derivative are known, when fitting a
246 // minimizing polynomial, we can fit up to a quadratic without requiring the
247 // gradient at the current query point.
248 const bool interpolation_uses_gradient_at_current_sample =
249 options().interpolation_type == CUBIC;
250 const double descent_direction_max_norm =
251 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
252
253 ++summary->num_function_evaluations;
254 if (interpolation_uses_gradient_at_current_sample) {
255 ++summary->num_gradient_evaluations;
256 }
257 current.value_is_valid =
258 function->Evaluate(current.x,
259 ¤t.value,
260 interpolation_uses_gradient_at_current_sample
261 ? ¤t.gradient : NULL);
262 current.gradient_is_valid =
263 interpolation_uses_gradient_at_current_sample && current.value_is_valid;
264 while (!current.value_is_valid ||
265 current.value > (initial_cost
266 + options().sufficient_decrease
267 * initial_gradient
268 * current.x)) {
269 // If current.value_is_valid is false, we treat it as if the cost at that
270 // point is not large enough to satisfy the sufficient decrease condition.
271 ++summary->num_iterations;
272 if (summary->num_iterations >= options().max_num_iterations) {
273 summary->error =
274 StringPrintf("Line search failed: Armijo failed to find a point "
275 "satisfying the sufficient decrease condition within "
276 "specified max_num_iterations: %d.",
277 options().max_num_iterations);
278 LOG_IF(WARNING, !options().is_silent) << summary->error;
279 return;
280 }
281
282 const double step_size =
283 this->InterpolatingPolynomialMinimizingStepSize(
284 options().interpolation_type,
285 initial_position,
286 previous,
287 current,
288 (options().max_step_contraction * current.x),
289 (options().min_step_contraction * current.x));
290
291 if (step_size * descent_direction_max_norm < options().min_step_size) {
292 summary->error =
293 StringPrintf("Line search failed: step_size too small: %.5e "
294 "with descent_direction_max_norm: %.5e.", step_size,
295 descent_direction_max_norm);
296 LOG_IF(WARNING, !options().is_silent) << summary->error;
297 return;
298 }
299
300 previous = current;
301 current.x = step_size;
302
303 ++summary->num_function_evaluations;
304 if (interpolation_uses_gradient_at_current_sample) {
305 ++summary->num_gradient_evaluations;
306 }
307 current.value_is_valid =
308 function->Evaluate(current.x,
309 ¤t.value,
310 interpolation_uses_gradient_at_current_sample
311 ? ¤t.gradient : NULL);
312 current.gradient_is_valid =
313 interpolation_uses_gradient_at_current_sample && current.value_is_valid;
314 }
315
316 summary->optimal_step_size = current.x;
317 summary->success = true;
318 }
319
WolfeLineSearch(const LineSearch::Options & options)320 WolfeLineSearch::WolfeLineSearch(const LineSearch::Options& options)
321 : LineSearch(options) {}
322
Search(const double step_size_estimate,const double initial_cost,const double initial_gradient,Summary * summary)323 void WolfeLineSearch::Search(const double step_size_estimate,
324 const double initial_cost,
325 const double initial_gradient,
326 Summary* summary) {
327 *CHECK_NOTNULL(summary) = LineSearch::Summary();
328 // All parameters should have been validated by the Solver, but as
329 // invalid values would produce crazy nonsense, hard check them here.
330 CHECK_GE(step_size_estimate, 0.0);
331 CHECK_GT(options().sufficient_decrease, 0.0);
332 CHECK_GT(options().sufficient_curvature_decrease,
333 options().sufficient_decrease);
334 CHECK_LT(options().sufficient_curvature_decrease, 1.0);
335 CHECK_GT(options().max_step_expansion, 1.0);
336
337 // Note initial_cost & initial_gradient are evaluated at step_size = 0,
338 // not step_size_estimate, which is our starting guess.
339 const FunctionSample initial_position =
340 ValueAndGradientSample(0.0, initial_cost, initial_gradient);
341
342 bool do_zoom_search = false;
343 // Important: The high/low in bracket_high & bracket_low refer to their
344 // _function_ values, not their step sizes i.e. it is _not_ required that
345 // bracket_low.x < bracket_high.x.
346 FunctionSample solution, bracket_low, bracket_high;
347
348 // Wolfe bracketing phase: Increases step_size until either it finds a point
349 // that satisfies the (strong) Wolfe conditions, or an interval that brackets
350 // step sizes which satisfy the conditions. From Nocedal & Wright [1] p61 the
351 // interval: (step_size_{k-1}, step_size_{k}) contains step lengths satisfying
352 // the strong Wolfe conditions if one of the following conditions are met:
353 //
354 // 1. step_size_{k} violates the sufficient decrease (Armijo) condition.
355 // 2. f(step_size_{k}) >= f(step_size_{k-1}).
356 // 3. f'(step_size_{k}) >= 0.
357 //
358 // Caveat: If f(step_size_{k}) is invalid, then step_size is reduced, ignoring
359 // this special case, step_size monotonically increases during bracketing.
360 if (!this->BracketingPhase(initial_position,
361 step_size_estimate,
362 &bracket_low,
363 &bracket_high,
364 &do_zoom_search,
365 summary)) {
366 // Failed to find either a valid point, a valid bracket satisfying the Wolfe
367 // conditions, or even a step size > minimum tolerance satisfying the Armijo
368 // condition.
369 return;
370 }
371
372 if (!do_zoom_search) {
373 // Either: Bracketing phase already found a point satisfying the strong
374 // Wolfe conditions, thus no Zoom required.
375 //
376 // Or: Bracketing failed to find a valid bracket or a point satisfying the
377 // strong Wolfe conditions within max_num_iterations, or whilst searching
378 // shrank the bracket width until it was below our minimum tolerance.
379 // As these are 'artificial' constraints, and we would otherwise fail to
380 // produce a valid point when ArmijoLineSearch would succeed, we return the
381 // point with the lowest cost found thus far which satsifies the Armijo
382 // condition (but not the Wolfe conditions).
383 summary->optimal_step_size = bracket_low.x;
384 summary->success = true;
385 return;
386 }
387
388 VLOG(3) << std::scientific << std::setprecision(kErrorMessageNumericPrecision)
389 << "Starting line search zoom phase with bracket_low: "
390 << bracket_low << ", bracket_high: " << bracket_high
391 << ", bracket width: " << fabs(bracket_low.x - bracket_high.x)
392 << ", bracket abs delta cost: "
393 << fabs(bracket_low.value - bracket_high.value);
394
395 // Wolfe Zoom phase: Called when the Bracketing phase finds an interval of
396 // non-zero, finite width that should bracket step sizes which satisfy the
397 // (strong) Wolfe conditions (before finding a step size that satisfies the
398 // conditions). Zoom successively decreases the size of the interval until a
399 // step size which satisfies the Wolfe conditions is found. The interval is
400 // defined by bracket_low & bracket_high, which satisfy:
401 //
402 // 1. The interval bounded by step sizes: bracket_low.x & bracket_high.x
403 // contains step sizes that satsify the strong Wolfe conditions.
404 // 2. bracket_low.x is of all the step sizes evaluated *which satisifed the
405 // Armijo sufficient decrease condition*, the one which generated the
406 // smallest function value, i.e. bracket_low.value <
407 // f(all other steps satisfying Armijo).
408 // - Note that this does _not_ (necessarily) mean that initially
409 // bracket_low.value < bracket_high.value (although this is typical)
410 // e.g. when bracket_low = initial_position, and bracket_high is the
411 // first sample, and which does not satisfy the Armijo condition,
412 // but still has bracket_high.value < initial_position.value.
413 // 3. bracket_high is chosen after bracket_low, s.t.
414 // bracket_low.gradient * (bracket_high.x - bracket_low.x) < 0.
415 if (!this->ZoomPhase(initial_position,
416 bracket_low,
417 bracket_high,
418 &solution,
419 summary) && !solution.value_is_valid) {
420 // Failed to find a valid point (given the specified decrease parameters)
421 // within the specified bracket.
422 return;
423 }
424 // Ensure that if we ran out of iterations whilst zooming the bracket, or
425 // shrank the bracket width to < tolerance and failed to find a point which
426 // satisfies the strong Wolfe curvature condition, that we return the point
427 // amongst those found thus far, which minimizes f() and satisfies the Armijo
428 // condition.
429 solution =
430 solution.value_is_valid && solution.value <= bracket_low.value
431 ? solution : bracket_low;
432
433 summary->optimal_step_size = solution.x;
434 summary->success = true;
435 }
436
437 // Returns true if either:
438 //
439 // A termination condition satisfying the (strong) Wolfe bracketing conditions
440 // is found:
441 //
442 // - A valid point, defined as a bracket of zero width [zoom not required].
443 // - A valid bracket (of width > tolerance), [zoom required].
444 //
445 // Or, searching was stopped due to an 'artificial' constraint, i.e. not
446 // a condition imposed / required by the underlying algorithm, but instead an
447 // engineering / implementation consideration. But a step which exceeds the
448 // minimum step size, and satsifies the Armijo condition was still found,
449 // and should thus be used [zoom not required].
450 //
451 // Returns false if no step size > minimum step size was found which
452 // satisfies at least the Armijo condition.
BracketingPhase(const FunctionSample & initial_position,const double step_size_estimate,FunctionSample * bracket_low,FunctionSample * bracket_high,bool * do_zoom_search,Summary * summary)453 bool WolfeLineSearch::BracketingPhase(
454 const FunctionSample& initial_position,
455 const double step_size_estimate,
456 FunctionSample* bracket_low,
457 FunctionSample* bracket_high,
458 bool* do_zoom_search,
459 Summary* summary) {
460 Function* function = options().function;
461
462 FunctionSample previous = initial_position;
463 FunctionSample current = ValueAndGradientSample(step_size_estimate, 0.0, 0.0);
464 current.value_is_valid = false;
465
466 const double descent_direction_max_norm =
467 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
468
469 *do_zoom_search = false;
470 *bracket_low = initial_position;
471
472 // As we require the gradient to evaluate the Wolfe condition, we always
473 // calculate it together with the value, irrespective of the interpolation
474 // type. As opposed to only calculating the gradient after the Armijo
475 // condition is satisifed, as the computational saving from this approach
476 // would be slight (perhaps even negative due to the extra call). Also,
477 // always calculating the value & gradient together protects against us
478 // reporting invalid solutions if the cost function returns slightly different
479 // function values when evaluated with / without gradients (due to numerical
480 // issues).
481 ++summary->num_function_evaluations;
482 ++summary->num_gradient_evaluations;
483 current.value_is_valid =
484 function->Evaluate(current.x,
485 ¤t.value,
486 ¤t.gradient);
487 current.gradient_is_valid = current.value_is_valid;
488
489 while (true) {
490 ++summary->num_iterations;
491
492 if (current.value_is_valid &&
493 (current.value > (initial_position.value
494 + options().sufficient_decrease
495 * initial_position.gradient
496 * current.x) ||
497 (previous.value_is_valid && current.value > previous.value))) {
498 // Bracket found: current step size violates Armijo sufficient decrease
499 // condition, or has stepped past an inflection point of f() relative to
500 // previous step size.
501 *do_zoom_search = true;
502 *bracket_low = previous;
503 *bracket_high = current;
504 VLOG(3) << std::scientific
505 << std::setprecision(kErrorMessageNumericPrecision)
506 << "Bracket found: current step (" << current.x
507 << ") violates Armijo sufficient condition, or has passed an "
508 << "inflection point of f() based on value.";
509 break;
510 }
511
512 if (current.value_is_valid &&
513 fabs(current.gradient) <=
514 -options().sufficient_curvature_decrease * initial_position.gradient) {
515 // Current step size satisfies the strong Wolfe conditions, and is thus a
516 // valid termination point, therefore a Zoom not required.
517 *bracket_low = current;
518 *bracket_high = current;
519 VLOG(3) << std::scientific
520 << std::setprecision(kErrorMessageNumericPrecision)
521 << "Bracketing phase found step size: " << current.x
522 << ", satisfying strong Wolfe conditions, initial_position: "
523 << initial_position << ", current: " << current;
524 break;
525
526 } else if (current.value_is_valid && current.gradient >= 0) {
527 // Bracket found: current step size has stepped past an inflection point
528 // of f(), but Armijo sufficient decrease is still satisfied and
529 // f(current) is our best minimum thus far. Remember step size
530 // monotonically increases, thus previous_step_size < current_step_size
531 // even though f(previous) > f(current).
532 *do_zoom_search = true;
533 // Note inverse ordering from first bracket case.
534 *bracket_low = current;
535 *bracket_high = previous;
536 VLOG(3) << "Bracket found: current step (" << current.x
537 << ") satisfies Armijo, but has gradient >= 0, thus have passed "
538 << "an inflection point of f().";
539 break;
540
541 } else if (current.value_is_valid &&
542 fabs(current.x - previous.x) * descent_direction_max_norm
543 < options().min_step_size) {
544 // We have shrunk the search bracket to a width less than our tolerance,
545 // and still not found either a point satisfying the strong Wolfe
546 // conditions, or a valid bracket containing such a point. Stop searching
547 // and set bracket_low to the size size amongst all those tested which
548 // minimizes f() and satisfies the Armijo condition.
549 LOG_IF(WARNING, !options().is_silent)
550 << "Line search failed: Wolfe bracketing phase shrank "
551 << "bracket width: " << fabs(current.x - previous.x)
552 << ", to < tolerance: " << options().min_step_size
553 << ", with descent_direction_max_norm: "
554 << descent_direction_max_norm << ", and failed to find "
555 << "a point satisfying the strong Wolfe conditions or a "
556 << "bracketing containing such a point. Accepting "
557 << "point found satisfying Armijo condition only, to "
558 << "allow continuation.";
559 *bracket_low = current;
560 break;
561
562 } else if (summary->num_iterations >= options().max_num_iterations) {
563 // Check num iterations bound here so that we always evaluate the
564 // max_num_iterations-th iteration against all conditions, and
565 // then perform no additional (unused) evaluations.
566 summary->error =
567 StringPrintf("Line search failed: Wolfe bracketing phase failed to "
568 "find a point satisfying strong Wolfe conditions, or a "
569 "bracket containing such a point within specified "
570 "max_num_iterations: %d", options().max_num_iterations);
571 LOG_IF(WARNING, !options().is_silent) << summary->error;
572 // Ensure that bracket_low is always set to the step size amongst all
573 // those tested which minimizes f() and satisfies the Armijo condition
574 // when we terminate due to the 'artificial' max_num_iterations condition.
575 *bracket_low =
576 current.value_is_valid && current.value < bracket_low->value
577 ? current : *bracket_low;
578 break;
579 }
580 // Either: f(current) is invalid; or, f(current) is valid, but does not
581 // satisfy the strong Wolfe conditions itself, or the conditions for
582 // being a boundary of a bracket.
583
584 // If f(current) is valid, (but meets no criteria) expand the search by
585 // increasing the step size.
586 const double max_step_size =
587 current.value_is_valid
588 ? (current.x * options().max_step_expansion) : current.x;
589
590 // We are performing 2-point interpolation only here, but the API of
591 // InterpolatingPolynomialMinimizingStepSize() allows for up to
592 // 3-point interpolation, so pad call with a sample with an invalid
593 // value that will therefore be ignored.
594 const FunctionSample unused_previous;
595 DCHECK(!unused_previous.value_is_valid);
596 // Contracts step size if f(current) is not valid.
597 const double step_size =
598 this->InterpolatingPolynomialMinimizingStepSize(
599 options().interpolation_type,
600 previous,
601 unused_previous,
602 current,
603 previous.x,
604 max_step_size);
605 if (step_size * descent_direction_max_norm < options().min_step_size) {
606 summary->error =
607 StringPrintf("Line search failed: step_size too small: %.5e "
608 "with descent_direction_max_norm: %.5e", step_size,
609 descent_direction_max_norm);
610 LOG_IF(WARNING, !options().is_silent) << summary->error;
611 return false;
612 }
613
614 previous = current.value_is_valid ? current : previous;
615 current.x = step_size;
616
617 ++summary->num_function_evaluations;
618 ++summary->num_gradient_evaluations;
619 current.value_is_valid =
620 function->Evaluate(current.x,
621 ¤t.value,
622 ¤t.gradient);
623 current.gradient_is_valid = current.value_is_valid;
624 }
625
626 // Ensure that even if a valid bracket was found, we will only mark a zoom
627 // as required if the bracket's width is greater than our minimum tolerance.
628 if (*do_zoom_search &&
629 fabs(bracket_high->x - bracket_low->x) * descent_direction_max_norm
630 < options().min_step_size) {
631 *do_zoom_search = false;
632 }
633
634 return true;
635 }
636
637 // Returns true iff solution satisfies the strong Wolfe conditions. Otherwise,
638 // on return false, if we stopped searching due to the 'artificial' condition of
639 // reaching max_num_iterations, solution is the step size amongst all those
640 // tested, which satisfied the Armijo decrease condition and minimized f().
ZoomPhase(const FunctionSample & initial_position,FunctionSample bracket_low,FunctionSample bracket_high,FunctionSample * solution,Summary * summary)641 bool WolfeLineSearch::ZoomPhase(const FunctionSample& initial_position,
642 FunctionSample bracket_low,
643 FunctionSample bracket_high,
644 FunctionSample* solution,
645 Summary* summary) {
646 Function* function = options().function;
647
648 CHECK(bracket_low.value_is_valid && bracket_low.gradient_is_valid)
649 << std::scientific << std::setprecision(kErrorMessageNumericPrecision)
650 << "Ceres bug: f_low input to Wolfe Zoom invalid, please contact "
651 << "the developers!, initial_position: " << initial_position
652 << ", bracket_low: " << bracket_low
653 << ", bracket_high: "<< bracket_high;
654 // We do not require bracket_high.gradient_is_valid as the gradient condition
655 // for a valid bracket is only dependent upon bracket_low.gradient, and
656 // in order to minimize jacobian evaluations, bracket_high.gradient may
657 // not have been calculated (if bracket_high.value does not satisfy the
658 // Armijo sufficient decrease condition and interpolation method does not
659 // require it).
660 //
661 // We also do not require that: bracket_low.value < bracket_high.value,
662 // although this is typical. This is to deal with the case when
663 // bracket_low = initial_position, bracket_high is the first sample,
664 // and bracket_high does not satisfy the Armijo condition, but still has
665 // bracket_high.value < initial_position.value.
666 CHECK(bracket_high.value_is_valid)
667 << std::scientific << std::setprecision(kErrorMessageNumericPrecision)
668 << "Ceres bug: f_high input to Wolfe Zoom invalid, please "
669 << "contact the developers!, initial_position: " << initial_position
670 << ", bracket_low: " << bracket_low
671 << ", bracket_high: "<< bracket_high;
672
673 if (bracket_low.gradient * (bracket_high.x - bracket_low.x) >= 0) {
674 // The third condition for a valid initial bracket:
675 //
676 // 3. bracket_high is chosen after bracket_low, s.t.
677 // bracket_low.gradient * (bracket_high.x - bracket_low.x) < 0.
678 //
679 // is not satisfied. As this can happen when the users' cost function
680 // returns inconsistent gradient values relative to the function values,
681 // we do not CHECK_LT(), but we do stop processing and return an invalid
682 // value.
683 summary->error =
684 StringPrintf("Line search failed: Wolfe zoom phase passed a bracket "
685 "which does not satisfy: bracket_low.gradient * "
686 "(bracket_high.x - bracket_low.x) < 0 [%.8e !< 0] "
687 "with initial_position: %s, bracket_low: %s, bracket_high:"
688 " %s, the most likely cause of which is the cost function "
689 "returning inconsistent gradient & function values.",
690 bracket_low.gradient * (bracket_high.x - bracket_low.x),
691 initial_position.ToDebugString().c_str(),
692 bracket_low.ToDebugString().c_str(),
693 bracket_high.ToDebugString().c_str());
694 LOG_IF(WARNING, !options().is_silent) << summary->error;
695 solution->value_is_valid = false;
696 return false;
697 }
698
699 const int num_bracketing_iterations = summary->num_iterations;
700 const double descent_direction_max_norm =
701 static_cast<const LineSearchFunction*>(function)->DirectionInfinityNorm();
702
703 while (true) {
704 // Set solution to bracket_low, as it is our best step size (smallest f())
705 // found thus far and satisfies the Armijo condition, even though it does
706 // not satisfy the Wolfe condition.
707 *solution = bracket_low;
708 if (summary->num_iterations >= options().max_num_iterations) {
709 summary->error =
710 StringPrintf("Line search failed: Wolfe zoom phase failed to "
711 "find a point satisfying strong Wolfe conditions "
712 "within specified max_num_iterations: %d, "
713 "(num iterations taken for bracketing: %d).",
714 options().max_num_iterations, num_bracketing_iterations);
715 LOG_IF(WARNING, !options().is_silent) << summary->error;
716 return false;
717 }
718 if (fabs(bracket_high.x - bracket_low.x) * descent_direction_max_norm
719 < options().min_step_size) {
720 // Bracket width has been reduced below tolerance, and no point satisfying
721 // the strong Wolfe conditions has been found.
722 summary->error =
723 StringPrintf("Line search failed: Wolfe zoom bracket width: %.5e "
724 "too small with descent_direction_max_norm: %.5e.",
725 fabs(bracket_high.x - bracket_low.x),
726 descent_direction_max_norm);
727 LOG_IF(WARNING, !options().is_silent) << summary->error;
728 return false;
729 }
730
731 ++summary->num_iterations;
732 // Polynomial interpolation requires inputs ordered according to step size,
733 // not f(step size).
734 const FunctionSample& lower_bound_step =
735 bracket_low.x < bracket_high.x ? bracket_low : bracket_high;
736 const FunctionSample& upper_bound_step =
737 bracket_low.x < bracket_high.x ? bracket_high : bracket_low;
738 // We are performing 2-point interpolation only here, but the API of
739 // InterpolatingPolynomialMinimizingStepSize() allows for up to
740 // 3-point interpolation, so pad call with a sample with an invalid
741 // value that will therefore be ignored.
742 const FunctionSample unused_previous;
743 DCHECK(!unused_previous.value_is_valid);
744 solution->x =
745 this->InterpolatingPolynomialMinimizingStepSize(
746 options().interpolation_type,
747 lower_bound_step,
748 unused_previous,
749 upper_bound_step,
750 lower_bound_step.x,
751 upper_bound_step.x);
752 // No check on magnitude of step size being too small here as it is
753 // lower-bounded by the initial bracket start point, which was valid.
754 //
755 // As we require the gradient to evaluate the Wolfe condition, we always
756 // calculate it together with the value, irrespective of the interpolation
757 // type. As opposed to only calculating the gradient after the Armijo
758 // condition is satisifed, as the computational saving from this approach
759 // would be slight (perhaps even negative due to the extra call). Also,
760 // always calculating the value & gradient together protects against us
761 // reporting invalid solutions if the cost function returns slightly
762 // different function values when evaluated with / without gradients (due
763 // to numerical issues).
764 ++summary->num_function_evaluations;
765 ++summary->num_gradient_evaluations;
766 solution->value_is_valid =
767 function->Evaluate(solution->x,
768 &solution->value,
769 &solution->gradient);
770 solution->gradient_is_valid = solution->value_is_valid;
771 if (!solution->value_is_valid) {
772 summary->error =
773 StringPrintf("Line search failed: Wolfe Zoom phase found "
774 "step_size: %.5e, for which function is invalid, "
775 "between low_step: %.5e and high_step: %.5e "
776 "at which function is valid.",
777 solution->x, bracket_low.x, bracket_high.x);
778 LOG_IF(WARNING, !options().is_silent) << summary->error;
779 return false;
780 }
781
782 VLOG(3) << "Zoom iteration: "
783 << summary->num_iterations - num_bracketing_iterations
784 << ", bracket_low: " << bracket_low
785 << ", bracket_high: " << bracket_high
786 << ", minimizing solution: " << *solution;
787
788 if ((solution->value > (initial_position.value
789 + options().sufficient_decrease
790 * initial_position.gradient
791 * solution->x)) ||
792 (solution->value >= bracket_low.value)) {
793 // Armijo sufficient decrease not satisfied, or not better
794 // than current lowest sample, use as new upper bound.
795 bracket_high = *solution;
796 continue;
797 }
798
799 // Armijo sufficient decrease satisfied, check strong Wolfe condition.
800 if (fabs(solution->gradient) <=
801 -options().sufficient_curvature_decrease * initial_position.gradient) {
802 // Found a valid termination point satisfying strong Wolfe conditions.
803 VLOG(3) << std::scientific
804 << std::setprecision(kErrorMessageNumericPrecision)
805 << "Zoom phase found step size: " << solution->x
806 << ", satisfying strong Wolfe conditions.";
807 break;
808
809 } else if (solution->gradient * (bracket_high.x - bracket_low.x) >= 0) {
810 bracket_high = bracket_low;
811 }
812
813 bracket_low = *solution;
814 }
815 // Solution contains a valid point which satisfies the strong Wolfe
816 // conditions.
817 return true;
818 }
819
820 } // namespace internal
821 } // namespace ceres
822