1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2010, 2011, 2012 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: keir@google.com (Keir Mierle) 30 // 31 // The ProgramEvaluator runs the cost functions contained in each residual block 32 // and stores the result into a jacobian. The particular type of jacobian is 33 // abstracted out using two template parameters: 34 // 35 // - An "EvaluatePreparer" that is responsible for creating the array with 36 // pointers to the jacobian blocks where the cost function evaluates to. 37 // - A "JacobianWriter" that is responsible for storing the resulting 38 // jacobian blocks in the passed sparse matrix. 39 // 40 // This abstraction affords an efficient evaluator implementation while still 41 // supporting writing to multiple sparse matrix formats. For example, when the 42 // ProgramEvaluator is parameterized for writing to block sparse matrices, the 43 // residual jacobians are written directly into their final position in the 44 // block sparse matrix by the user's CostFunction; there is no copying. 45 // 46 // The evaluation is threaded with OpenMP. 47 // 48 // The EvaluatePreparer and JacobianWriter interfaces are as follows: 49 // 50 // class EvaluatePreparer { 51 // // Prepare the jacobians array for use as the destination of a call to 52 // // a cost function's evaluate method. 53 // void Prepare(const ResidualBlock* residual_block, 54 // int residual_block_index, 55 // SparseMatrix* jacobian, 56 // double** jacobians); 57 // } 58 // 59 // class JacobianWriter { 60 // // Create a jacobian that this writer can write. Same as 61 // // Evaluator::CreateJacobian. 62 // SparseMatrix* CreateJacobian() const; 63 // 64 // // Create num_threads evaluate preparers. Caller owns result which must 65 // // be freed with delete[]. Resulting preparers are valid while *this is. 66 // EvaluatePreparer* CreateEvaluatePreparers(int num_threads); 67 // 68 // // Write the block jacobians from a residual block evaluation to the 69 // // larger sparse jacobian. 70 // void Write(int residual_id, 71 // int residual_offset, 72 // double** jacobians, 73 // SparseMatrix* jacobian); 74 // } 75 // 76 // Note: The ProgramEvaluator is not thread safe, since internally it maintains 77 // some per-thread scratch space. 78 79 #ifndef CERES_INTERNAL_PROGRAM_EVALUATOR_H_ 80 #define CERES_INTERNAL_PROGRAM_EVALUATOR_H_ 81 82 // This include must come before any #ifndef check on Ceres compile options. 83 #include "ceres/internal/port.h" 84 85 #ifdef CERES_USE_OPENMP 86 #include <omp.h> 87 #endif 88 89 #include <map> 90 #include <string> 91 #include <vector> 92 #include "ceres/execution_summary.h" 93 #include "ceres/internal/eigen.h" 94 #include "ceres/internal/scoped_ptr.h" 95 #include "ceres/parameter_block.h" 96 #include "ceres/program.h" 97 #include "ceres/residual_block.h" 98 #include "ceres/small_blas.h" 99 100 namespace ceres { 101 namespace internal { 102 103 struct NullJacobianFinalizer { operatorNullJacobianFinalizer104 void operator()(SparseMatrix* jacobian, int num_parameters) {} 105 }; 106 107 template<typename EvaluatePreparer, 108 typename JacobianWriter, 109 typename JacobianFinalizer = NullJacobianFinalizer> 110 class ProgramEvaluator : public Evaluator { 111 public: ProgramEvaluator(const Evaluator::Options & options,Program * program)112 ProgramEvaluator(const Evaluator::Options &options, Program* program) 113 : options_(options), 114 program_(program), 115 jacobian_writer_(options, program), 116 evaluate_preparers_( 117 jacobian_writer_.CreateEvaluatePreparers(options.num_threads)) { 118 #ifndef CERES_USE_OPENMP 119 CHECK_EQ(1, options_.num_threads) 120 << "OpenMP support is not compiled into this binary; " 121 << "only options.num_threads=1 is supported."; 122 #endif 123 124 BuildResidualLayout(*program, &residual_layout_); 125 evaluate_scratch_.reset(CreateEvaluatorScratch(*program, 126 options.num_threads)); 127 } 128 129 // Implementation of Evaluator interface. CreateJacobian()130 SparseMatrix* CreateJacobian() const { 131 return jacobian_writer_.CreateJacobian(); 132 } 133 Evaluate(const Evaluator::EvaluateOptions & evaluate_options,const double * state,double * cost,double * residuals,double * gradient,SparseMatrix * jacobian)134 bool Evaluate(const Evaluator::EvaluateOptions& evaluate_options, 135 const double* state, 136 double* cost, 137 double* residuals, 138 double* gradient, 139 SparseMatrix* jacobian) { 140 ScopedExecutionTimer total_timer("Evaluator::Total", &execution_summary_); 141 ScopedExecutionTimer call_type_timer(gradient == NULL && jacobian == NULL 142 ? "Evaluator::Residual" 143 : "Evaluator::Jacobian", 144 &execution_summary_); 145 146 // The parameters are stateful, so set the state before evaluating. 147 if (!program_->StateVectorToParameterBlocks(state)) { 148 return false; 149 } 150 151 if (residuals != NULL) { 152 VectorRef(residuals, program_->NumResiduals()).setZero(); 153 } 154 155 if (jacobian != NULL) { 156 jacobian->SetZero(); 157 } 158 159 // Each thread gets it's own cost and evaluate scratch space. 160 for (int i = 0; i < options_.num_threads; ++i) { 161 evaluate_scratch_[i].cost = 0.0; 162 if (gradient != NULL) { 163 VectorRef(evaluate_scratch_[i].gradient.get(), 164 program_->NumEffectiveParameters()).setZero(); 165 } 166 } 167 168 // This bool is used to disable the loop if an error is encountered 169 // without breaking out of it. The remaining loop iterations are still run, 170 // but with an empty body, and so will finish quickly. 171 bool abort = false; 172 int num_residual_blocks = program_->NumResidualBlocks(); 173 #pragma omp parallel for num_threads(options_.num_threads) 174 for (int i = 0; i < num_residual_blocks; ++i) { 175 // Disable the loop instead of breaking, as required by OpenMP. 176 #pragma omp flush(abort) 177 if (abort) { 178 continue; 179 } 180 181 #ifdef CERES_USE_OPENMP 182 int thread_id = omp_get_thread_num(); 183 #else 184 int thread_id = 0; 185 #endif 186 EvaluatePreparer* preparer = &evaluate_preparers_[thread_id]; 187 EvaluateScratch* scratch = &evaluate_scratch_[thread_id]; 188 189 // Prepare block residuals if requested. 190 const ResidualBlock* residual_block = program_->residual_blocks()[i]; 191 double* block_residuals = NULL; 192 if (residuals != NULL) { 193 block_residuals = residuals + residual_layout_[i]; 194 } else if (gradient != NULL) { 195 block_residuals = scratch->residual_block_residuals.get(); 196 } 197 198 // Prepare block jacobians if requested. 199 double** block_jacobians = NULL; 200 if (jacobian != NULL || gradient != NULL) { 201 preparer->Prepare(residual_block, 202 i, 203 jacobian, 204 scratch->jacobian_block_ptrs.get()); 205 block_jacobians = scratch->jacobian_block_ptrs.get(); 206 } 207 208 // Evaluate the cost, residuals, and jacobians. 209 double block_cost; 210 if (!residual_block->Evaluate( 211 evaluate_options.apply_loss_function, 212 &block_cost, 213 block_residuals, 214 block_jacobians, 215 scratch->residual_block_evaluate_scratch.get())) { 216 abort = true; 217 // This ensures that the OpenMP threads have a consistent view of 'abort'. Do 218 // the flush inside the failure case so that there is usually only one 219 // synchronization point per loop iteration instead of two. 220 #pragma omp flush(abort) 221 continue; 222 } 223 224 scratch->cost += block_cost; 225 226 // Store the jacobians, if they were requested. 227 if (jacobian != NULL) { 228 jacobian_writer_.Write(i, 229 residual_layout_[i], 230 block_jacobians, 231 jacobian); 232 } 233 234 // Compute and store the gradient, if it was requested. 235 if (gradient != NULL) { 236 int num_residuals = residual_block->NumResiduals(); 237 int num_parameter_blocks = residual_block->NumParameterBlocks(); 238 for (int j = 0; j < num_parameter_blocks; ++j) { 239 const ParameterBlock* parameter_block = 240 residual_block->parameter_blocks()[j]; 241 if (parameter_block->IsConstant()) { 242 continue; 243 } 244 245 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( 246 block_jacobians[j], 247 num_residuals, 248 parameter_block->LocalSize(), 249 block_residuals, 250 scratch->gradient.get() + parameter_block->delta_offset()); 251 } 252 } 253 } 254 255 if (!abort) { 256 const int num_parameters = program_->NumEffectiveParameters(); 257 258 // Sum the cost and gradient (if requested) from each thread. 259 (*cost) = 0.0; 260 if (gradient != NULL) { 261 VectorRef(gradient, num_parameters).setZero(); 262 } 263 for (int i = 0; i < options_.num_threads; ++i) { 264 (*cost) += evaluate_scratch_[i].cost; 265 if (gradient != NULL) { 266 VectorRef(gradient, num_parameters) += 267 VectorRef(evaluate_scratch_[i].gradient.get(), num_parameters); 268 } 269 } 270 271 // Finalize the Jacobian if it is available. 272 // `num_parameters` is passed to the finalizer so that additional 273 // storage can be reserved for additional diagonal elements if 274 // necessary. 275 if (jacobian != NULL) { 276 JacobianFinalizer f; 277 f(jacobian, num_parameters); 278 } 279 } 280 return !abort; 281 } 282 Plus(const double * state,const double * delta,double * state_plus_delta)283 bool Plus(const double* state, 284 const double* delta, 285 double* state_plus_delta) const { 286 return program_->Plus(state, delta, state_plus_delta); 287 } 288 NumParameters()289 int NumParameters() const { 290 return program_->NumParameters(); 291 } NumEffectiveParameters()292 int NumEffectiveParameters() const { 293 return program_->NumEffectiveParameters(); 294 } 295 NumResiduals()296 int NumResiduals() const { 297 return program_->NumResiduals(); 298 } 299 CallStatistics()300 virtual map<string, int> CallStatistics() const { 301 return execution_summary_.calls(); 302 } 303 TimeStatistics()304 virtual map<string, double> TimeStatistics() const { 305 return execution_summary_.times(); 306 } 307 308 private: 309 // Per-thread scratch space needed to evaluate and store each residual block. 310 struct EvaluateScratch { InitEvaluateScratch311 void Init(int max_parameters_per_residual_block, 312 int max_scratch_doubles_needed_for_evaluate, 313 int max_residuals_per_residual_block, 314 int num_parameters) { 315 residual_block_evaluate_scratch.reset( 316 new double[max_scratch_doubles_needed_for_evaluate]); 317 gradient.reset(new double[num_parameters]); 318 VectorRef(gradient.get(), num_parameters).setZero(); 319 residual_block_residuals.reset( 320 new double[max_residuals_per_residual_block]); 321 jacobian_block_ptrs.reset( 322 new double*[max_parameters_per_residual_block]); 323 } 324 325 double cost; 326 scoped_array<double> residual_block_evaluate_scratch; 327 // The gradient in the local parameterization. 328 scoped_array<double> gradient; 329 // Enough space to store the residual for the largest residual block. 330 scoped_array<double> residual_block_residuals; 331 scoped_array<double*> jacobian_block_ptrs; 332 }; 333 BuildResidualLayout(const Program & program,vector<int> * residual_layout)334 static void BuildResidualLayout(const Program& program, 335 vector<int>* residual_layout) { 336 const vector<ResidualBlock*>& residual_blocks = program.residual_blocks(); 337 residual_layout->resize(program.NumResidualBlocks()); 338 int residual_pos = 0; 339 for (int i = 0; i < residual_blocks.size(); ++i) { 340 const int num_residuals = residual_blocks[i]->NumResiduals(); 341 (*residual_layout)[i] = residual_pos; 342 residual_pos += num_residuals; 343 } 344 } 345 346 // Create scratch space for each thread evaluating the program. CreateEvaluatorScratch(const Program & program,int num_threads)347 static EvaluateScratch* CreateEvaluatorScratch(const Program& program, 348 int num_threads) { 349 int max_parameters_per_residual_block = 350 program.MaxParametersPerResidualBlock(); 351 int max_scratch_doubles_needed_for_evaluate = 352 program.MaxScratchDoublesNeededForEvaluate(); 353 int max_residuals_per_residual_block = 354 program.MaxResidualsPerResidualBlock(); 355 int num_parameters = program.NumEffectiveParameters(); 356 357 EvaluateScratch* evaluate_scratch = new EvaluateScratch[num_threads]; 358 for (int i = 0; i < num_threads; i++) { 359 evaluate_scratch[i].Init(max_parameters_per_residual_block, 360 max_scratch_doubles_needed_for_evaluate, 361 max_residuals_per_residual_block, 362 num_parameters); 363 } 364 return evaluate_scratch; 365 } 366 367 Evaluator::Options options_; 368 Program* program_; 369 JacobianWriter jacobian_writer_; 370 scoped_array<EvaluatePreparer> evaluate_preparers_; 371 scoped_array<EvaluateScratch> evaluate_scratch_; 372 vector<int> residual_layout_; 373 ::ceres::internal::ExecutionSummary execution_summary_; 374 }; 375 376 } // namespace internal 377 } // namespace ceres 378 379 #endif // CERES_INTERNAL_PROGRAM_EVALUATOR_H_ 380