1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2013 Google Inc. All rights reserved. 3 // http://code.google.com/p/ceres-solver/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal@google.com (Sameer Agarwal) 30 // mierle@gmail.com (Keir Mierle) 31 // 32 // This autodiff implementation differs from the one found in 33 // autodiff_cost_function.h by supporting autodiff on cost functions 34 // with variable numbers of parameters with variable sizes. With the 35 // other implementation, all the sizes (both the number of parameter 36 // blocks and the size of each block) must be fixed at compile time. 37 // 38 // The functor API differs slightly from the API for fixed size 39 // autodiff; the expected interface for the cost functors is: 40 // 41 // struct MyCostFunctor { 42 // template<typename T> 43 // bool operator()(T const* const* parameters, T* residuals) const { 44 // // Use parameters[i] to access the i'th parameter block. 45 // } 46 // } 47 // 48 // Since the sizing of the parameters is done at runtime, you must 49 // also specify the sizes after creating the dynamic autodiff cost 50 // function. For example: 51 // 52 // DynamicAutoDiffCostFunction<MyCostFunctor, 3> cost_function( 53 // new MyCostFunctor()); 54 // cost_function.AddParameterBlock(5); 55 // cost_function.AddParameterBlock(10); 56 // cost_function.SetNumResiduals(21); 57 // 58 // Under the hood, the implementation evaluates the cost function 59 // multiple times, computing a small set of the derivatives (four by 60 // default, controlled by the Stride template parameter) with each 61 // pass. There is a tradeoff with the size of the passes; you may want 62 // to experiment with the stride. 63 64 #ifndef CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 65 #define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 66 67 #include <cmath> 68 #include <numeric> 69 #include <vector> 70 71 #include "ceres/cost_function.h" 72 #include "ceres/internal/scoped_ptr.h" 73 #include "ceres/jet.h" 74 #include "glog/logging.h" 75 76 namespace ceres { 77 78 template <typename CostFunctor, int Stride = 4> 79 class DynamicAutoDiffCostFunction : public CostFunction { 80 public: DynamicAutoDiffCostFunction(CostFunctor * functor)81 explicit DynamicAutoDiffCostFunction(CostFunctor* functor) 82 : functor_(functor) {} 83 ~DynamicAutoDiffCostFunction()84 virtual ~DynamicAutoDiffCostFunction() {} 85 AddParameterBlock(int size)86 void AddParameterBlock(int size) { 87 mutable_parameter_block_sizes()->push_back(size); 88 } 89 SetNumResiduals(int num_residuals)90 void SetNumResiduals(int num_residuals) { 91 set_num_residuals(num_residuals); 92 } 93 Evaluate(double const * const * parameters,double * residuals,double ** jacobians)94 virtual bool Evaluate(double const* const* parameters, 95 double* residuals, 96 double** jacobians) const { 97 CHECK_GT(num_residuals(), 0) 98 << "You must call DynamicAutoDiffCostFunction::SetNumResiduals() " 99 << "before DynamicAutoDiffCostFunction::Evaluate()."; 100 101 if (jacobians == NULL) { 102 return (*functor_)(parameters, residuals); 103 } 104 105 // The difficulty with Jets, as implemented in Ceres, is that they were 106 // originally designed for strictly compile-sized use. At this point, there 107 // is a large body of code that assumes inside a cost functor it is 108 // acceptable to do e.g. T(1.5) and get an appropriately sized jet back. 109 // 110 // Unfortunately, it is impossible to communicate the expected size of a 111 // dynamically sized jet to the static instantiations that existing code 112 // depends on. 113 // 114 // To work around this issue, the solution here is to evaluate the 115 // jacobians in a series of passes, each one computing Stripe * 116 // num_residuals() derivatives. This is done with small, fixed-size jets. 117 const int num_parameter_blocks = parameter_block_sizes().size(); 118 const int num_parameters = std::accumulate(parameter_block_sizes().begin(), 119 parameter_block_sizes().end(), 120 0); 121 122 // Allocate scratch space for the strided evaluation. 123 vector<Jet<double, Stride> > input_jets(num_parameters); 124 vector<Jet<double, Stride> > output_jets(num_residuals()); 125 126 // Make the parameter pack that is sent to the functor (reused). 127 vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, 128 static_cast<Jet<double, Stride>* >(NULL)); 129 int num_active_parameters = 0; 130 131 // To handle constant parameters between non-constant parameter blocks, the 132 // start position --- a raw parameter index --- of each contiguous block of 133 // non-constant parameters is recorded in start_derivative_section. 134 vector<int> start_derivative_section; 135 bool in_derivative_section = false; 136 int parameter_cursor = 0; 137 138 // Discover the derivative sections and set the parameter values. 139 for (int i = 0; i < num_parameter_blocks; ++i) { 140 jet_parameters[i] = &input_jets[parameter_cursor]; 141 142 const int parameter_block_size = parameter_block_sizes()[i]; 143 if (jacobians[i] != NULL) { 144 if (!in_derivative_section) { 145 start_derivative_section.push_back(parameter_cursor); 146 in_derivative_section = true; 147 } 148 149 num_active_parameters += parameter_block_size; 150 } else { 151 in_derivative_section = false; 152 } 153 154 for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) { 155 input_jets[parameter_cursor].a = parameters[i][j]; 156 } 157 } 158 159 // When `num_active_parameters % Stride != 0` then it can be the case 160 // that `active_parameter_count < Stride` while parameter_cursor is less 161 // than the total number of parameters and with no remaining non-constant 162 // parameter blocks. Pushing parameter_cursor (the total number of 163 // parameters) as a final entry to start_derivative_section is required 164 // because if a constant parameter block is encountered after the 165 // last non-constant block then current_derivative_section is incremented 166 // and would otherwise index an invalid position in 167 // start_derivative_section. Setting the final element to the total number 168 // of parameters means that this can only happen at most once in the loop 169 // below. 170 start_derivative_section.push_back(parameter_cursor); 171 172 // Evaluate all of the strides. Each stride is a chunk of the derivative to 173 // evaluate, typically some size proportional to the size of the SIMD 174 // registers of the CPU. 175 int num_strides = static_cast<int>(ceil(num_active_parameters / 176 static_cast<float>(Stride))); 177 178 int current_derivative_section = 0; 179 int current_derivative_section_cursor = 0; 180 181 for (int pass = 0; pass < num_strides; ++pass) { 182 // Set most of the jet components to zero, except for 183 // non-constant #Stride parameters. 184 const int initial_derivative_section = current_derivative_section; 185 const int initial_derivative_section_cursor = 186 current_derivative_section_cursor; 187 188 int active_parameter_count = 0; 189 parameter_cursor = 0; 190 191 for (int i = 0; i < num_parameter_blocks; ++i) { 192 for (int j = 0; j < parameter_block_sizes()[i]; 193 ++j, parameter_cursor++) { 194 input_jets[parameter_cursor].v.setZero(); 195 if (active_parameter_count < Stride && 196 parameter_cursor >= ( 197 start_derivative_section[current_derivative_section] + 198 current_derivative_section_cursor)) { 199 if (jacobians[i] != NULL) { 200 input_jets[parameter_cursor].v[active_parameter_count] = 1.0; 201 ++active_parameter_count; 202 ++current_derivative_section_cursor; 203 } else { 204 ++current_derivative_section; 205 current_derivative_section_cursor = 0; 206 } 207 } 208 } 209 } 210 211 if (!(*functor_)(&jet_parameters[0], &output_jets[0])) { 212 return false; 213 } 214 215 // Copy the pieces of the jacobians into their final place. 216 active_parameter_count = 0; 217 218 current_derivative_section = initial_derivative_section; 219 current_derivative_section_cursor = initial_derivative_section_cursor; 220 221 for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) { 222 for (int j = 0; j < parameter_block_sizes()[i]; 223 ++j, parameter_cursor++) { 224 if (active_parameter_count < Stride && 225 parameter_cursor >= ( 226 start_derivative_section[current_derivative_section] + 227 current_derivative_section_cursor)) { 228 if (jacobians[i] != NULL) { 229 for (int k = 0; k < num_residuals(); ++k) { 230 jacobians[i][k * parameter_block_sizes()[i] + j] = 231 output_jets[k].v[active_parameter_count]; 232 } 233 ++active_parameter_count; 234 ++current_derivative_section_cursor; 235 } else { 236 ++current_derivative_section; 237 current_derivative_section_cursor = 0; 238 } 239 } 240 } 241 } 242 243 // Only copy the residuals over once (even though we compute them on 244 // every loop). 245 if (pass == num_strides - 1) { 246 for (int k = 0; k < num_residuals(); ++k) { 247 residuals[k] = output_jets[k].a; 248 } 249 } 250 } 251 return true; 252 } 253 254 private: 255 internal::scoped_ptr<CostFunctor> functor_; 256 }; 257 258 } // namespace ceres 259 260 #endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_ 261