1 /*
2 * Library: lmfit (Levenberg-Marquardt least squares fitting)
3 *
4 * File: lmmin.c
5 *
6 * Contents: Levenberg-Marquardt minimization.
7 *
8 * Copyright: MINPACK authors, The University of Chikago (1980-1999)
9 * Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
10 *
11 * License: see ../COPYING (FreeBSD)
12 *
13 * Homepage: apps.jcns.fz-juelich.de/lmfit
14 */
15
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <math.h>
20 #include <float.h>
21 #include "lmmin.h"
22
23 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
24 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
25 #define SQR(x) (x) * (x)
26
27 /* Declare functions that do the heavy numerics.
28 Implementions are in this source file, below lmmin.
29 Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
30 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
31 const double* diag, const double* qtb, const double delta,
32 double* par, double* x, double* Sdiag, double* aux, double* xdi);
33 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
34 double* Acnorm, double* W);
35 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
36 const double* diag, const double* qtb, double* x,
37 double* Sdiag, double* W);
38
39 /******************************************************************************/
40 /* Numeric constants */
41 /******************************************************************************/
42
43 /* Set machine-dependent constants to values from float.h. */
44 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
45 #define LM_DWARF DBL_MIN /* smallest nonzero number */
46 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
47 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
48 #define LM_USERTOL 30 * LM_MACHEP /* users are recommended to require this */
49
50 /* If the above values do not work, the following seem good for an x86:
51 LM_MACHEP .555e-16
52 LM_DWARF 9.9e-324
53 LM_SQRT_DWARF 1.e-160
54 LM_SQRT_GIANT 1.e150
55 LM_USER_TOL 1.e-14
56 The following values should work on any machine:
57 LM_MACHEP 1.2e-16
58 LM_DWARF 1.0e-38
59 LM_SQRT_DWARF 3.834e-20
60 LM_SQRT_GIANT 1.304e19
61 LM_USER_TOL 1.e-14
62 */
63
64 /* Predefined control parameter sets (msgfile=NULL means stdout). */
65 const lm_control_struct lm_control_double = {
66 LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
67 100., 100, 1, NULL, 0, -1, -1};
68 const lm_control_struct lm_control_float = {
69 1.e-7, 1.e-7, 1.e-7, 1.e-7,
70 100., 100, 1, NULL, 0, -1, -1};
71
72 /******************************************************************************/
73 /* Message texts (indexed by status.info) */
74 /******************************************************************************/
75
76 const char* lm_infmsg[] = {
77 "found zero (sum of squares below underflow limit)",
78 "converged (the relative error in the sum of squares is at most tol)",
79 "converged (the relative error of the parameter vector is at most tol)",
80 "converged (both errors are at most tol)",
81 "trapped (by degeneracy; increasing epsilon might help)",
82 "exhausted (number of function calls exceeding preset patience)",
83 "failed (ftol<tol: cannot reduce sum of squares any further)",
84 "failed (xtol<tol: cannot improve approximate solution any further)",
85 "failed (gtol<tol: cannot improve approximate solution any further)",
86 "crashed (not enough memory)",
87 "exploded (fatal coding error: improper input parameters)",
88 "stopped (break requested within function evaluation)",
89 "found nan (function value is not-a-number or infinite)"};
90
91 const char* lm_shortmsg[] = {
92 "found zero",
93 "converged (f)",
94 "converged (p)",
95 "converged (2)",
96 "degenerate",
97 "call limit",
98 "failed (f)",
99 "failed (p)",
100 "failed (o)",
101 "no memory",
102 "invalid input",
103 "user break",
104 "found nan"};
105
106 /******************************************************************************/
107 /* Monitoring auxiliaries. */
108 /******************************************************************************/
109
lm_print_pars(int nout,const double * par,FILE * fout)110 void lm_print_pars(int nout, const double* par, FILE* fout)
111 {
112 int i;
113 for (i = 0; i < nout; ++i)
114 fprintf(fout, " %16.9g", par[i]);
115 fprintf(fout, "\n");
116 }
117
118 /******************************************************************************/
119 /* lmmin (main minimization routine) */
120 /******************************************************************************/
121
lmmin(const int n,double * x,const int m,const void * data,void (* evaluate)(const double * par,const int m_dat,const void * data,double * fvec,int * userbreak),const lm_control_struct * C,lm_status_struct * S)122 void lmmin(const int n, double* x, const int m, const void* data,
123 void (*evaluate)(const double* par, const int m_dat,
124 const void* data, double* fvec, int* userbreak),
125 const lm_control_struct* C, lm_status_struct* S)
126 {
127 int j, i;
128 double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
129 sum, temp, temp1, temp2, temp3;
130
131 /*** Initialize internal variables. ***/
132
133 int maxfev = C->patience * (n+1);
134
135 int inner_success; /* flag for loop control */
136 double lmpar = 0; /* Levenberg-Marquardt parameter */
137 double delta = 0;
138 double xnorm = 0;
139 double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
140
141 int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
142
143 /* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
144 compile-time initialization of lm_control_double and similar). */
145 FILE* msgfile = C->msgfile ? C->msgfile : stdout;
146
147 /*** Default status info; must be set before first return statement. ***/
148
149 S->outcome = 0; /* status code */
150 S->userbreak = 0;
151 S->nfev = 0; /* function evaluation counter */
152
153 /*** Check input parameters for errors. ***/
154
155 if (n <= 0) {
156 fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
157 S->outcome = 10;
158 return;
159 }
160 if (m < n) {
161 fprintf(stderr, "lmmin: number of data points (%i) "
162 "smaller than number of parameters (%i)\n",
163 m, n);
164 S->outcome = 10;
165 return;
166 }
167 if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
168 fprintf(stderr,
169 "lmmin: negative tolerance (at least one of %g %g %g)\n",
170 C->ftol, C->xtol, C->gtol);
171 S->outcome = 10;
172 return;
173 }
174 if (maxfev <= 0) {
175 fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
176 maxfev);
177 S->outcome = 10;
178 return;
179 }
180 if (C->stepbound <= 0) {
181 fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
182 S->outcome = 10;
183 return;
184 }
185 if (C->scale_diag != 0 && C->scale_diag != 1) {
186 fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
187 "should be 0 or 1\n",
188 C->scale_diag);
189 S->outcome = 10;
190 return;
191 }
192
193 /*** Allocate work space. ***/
194
195 /* Allocate total workspace with just one system call */
196 char* ws;
197 if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
198 n * sizeof(int))) == NULL) {
199 S->outcome = 9;
200 return;
201 }
202
203 /* Assign workspace segments. */
204 char* pws = ws;
205 double* fvec = (double*)pws;
206 pws += m * sizeof(double) / sizeof(char);
207 double* diag = (double*)pws;
208 pws += n * sizeof(double) / sizeof(char);
209 double* qtf = (double*)pws;
210 pws += n * sizeof(double) / sizeof(char);
211 double* fjac = (double*)pws;
212 pws += n * m * sizeof(double) / sizeof(char);
213 double* wa1 = (double*)pws;
214 pws += n * sizeof(double) / sizeof(char);
215 double* wa2 = (double*)pws;
216 pws += n * sizeof(double) / sizeof(char);
217 double* wa3 = (double*)pws;
218 pws += n * sizeof(double) / sizeof(char);
219 double* wf = (double*)pws;
220 pws += m * sizeof(double) / sizeof(char);
221 int* Pivot = (int*)pws;
222 pws += n * sizeof(int) / sizeof(char);
223
224 /* Initialize diag. */
225 if (!C->scale_diag)
226 for (j = 0; j < n; j++)
227 diag[j] = 1;
228
229 /*** Evaluate function at starting point and calculate norm. ***/
230
231 if (C->verbosity) {
232 fprintf(msgfile, "lmmin start ");
233 lm_print_pars(nout, x, msgfile);
234 }
235 (*evaluate)(x, m, data, fvec, &(S->userbreak));
236 if (C->verbosity > 4)
237 for (i = 0; i < m; ++i)
238 fprintf(msgfile, " fvec[%4i] = %18.8g\n", i, fvec[i]);
239 S->nfev = 1;
240 if (S->userbreak)
241 goto terminate;
242 fnorm = lm_enorm(m, fvec);
243 if (C->verbosity)
244 fprintf(msgfile, " fnorm = %18.8g\n", fnorm);
245
246 if (!isfinite(fnorm)) {
247 S->outcome = 12; /* nan */
248 goto terminate;
249 } else if (fnorm <= LM_DWARF) {
250 S->outcome = 0; /* sum of squares almost zero, nothing to do */
251 goto terminate;
252 }
253
254 /*** The outer loop: compute gradient, then descend. ***/
255
256 for (int outer = 0;; ++outer) {
257
258 /** Calculate the Jacobian. **/
259 for (j = 0; j < n; j++) {
260 temp = x[j];
261 step = MAX(eps * eps, eps * fabs(temp));
262 x[j] += step; /* replace temporarily */
263 (*evaluate)(x, m, data, wf, &(S->userbreak));
264 ++(S->nfev);
265 if (S->userbreak)
266 goto terminate;
267 for (i = 0; i < m; i++)
268 fjac[j*m+i] = (wf[i] - fvec[i]) / step;
269 x[j] = temp; /* restore */
270 }
271 if (C->verbosity >= 10) {
272 /* print the entire matrix */
273 printf("\nlmmin Jacobian\n");
274 for (i = 0; i < m; i++) {
275 printf(" ");
276 for (j = 0; j < n; j++)
277 printf("%.5e ", fjac[j*m+i]);
278 printf("\n");
279 }
280 }
281
282 /** Compute the QR factorization of the Jacobian. **/
283
284 /* fjac is an m by n array. The upper n by n submatrix of fjac is made
285 * to contain an upper triangular matrix R with diagonal elements of
286 * nonincreasing magnitude such that
287 *
288 * P^T*(J^T*J)*P = R^T*R
289 *
290 * (NOTE: ^T stands for matrix transposition),
291 *
292 * where P is a permutation matrix and J is the final calculated
293 * Jacobian. Column j of P is column Pivot(j) of the identity matrix.
294 * The lower trapezoidal part of fjac contains information generated
295 * during the computation of R.
296 *
297 * Pivot is an integer array of length n. It defines a permutation
298 * matrix P such that jac*P = Q*R, where jac is the final calculated
299 * Jacobian, Q is orthogonal (not stored), and R is upper triangular
300 * with diagonal elements of nonincreasing magnitude. Column j of P
301 * is column Pivot(j) of the identity matrix.
302 */
303
304 lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
305 /* return values are Pivot, wa1=rdiag, wa2=acnorm */
306
307 /** Form Q^T * fvec, and store first n components in qtf. **/
308 for (i = 0; i < m; i++)
309 wf[i] = fvec[i];
310
311 for (j = 0; j < n; j++) {
312 temp3 = fjac[j*m+j];
313 if (temp3 != 0) {
314 sum = 0;
315 for (i = j; i < m; i++)
316 sum += fjac[j*m+i] * wf[i];
317 temp = -sum / temp3;
318 for (i = j; i < m; i++)
319 wf[i] += fjac[j*m+i] * temp;
320 }
321 fjac[j*m+j] = wa1[j];
322 qtf[j] = wf[j];
323 }
324
325 /** Compute norm of scaled gradient and detect degeneracy. **/
326 gnorm = 0;
327 for (j = 0; j < n; j++) {
328 if (wa2[Pivot[j]] == 0)
329 continue;
330 sum = 0;
331 for (i = 0; i <= j; i++)
332 sum += fjac[j*m+i] * qtf[i];
333 gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
334 }
335
336 if (gnorm <= C->gtol) {
337 S->outcome = 4;
338 goto terminate;
339 }
340
341 /** Initialize or update diag and delta. **/
342 if (!outer) { /* first iteration only */
343 if (C->scale_diag) {
344 /* diag := norms of the columns of the initial Jacobian */
345 for (j = 0; j < n; j++)
346 diag[j] = wa2[j] ? wa2[j] : 1;
347 /* xnorm := || D x || */
348 for (j = 0; j < n; j++)
349 wa3[j] = diag[j] * x[j];
350 xnorm = lm_enorm(n, wa3);
351 if (C->verbosity >= 2) {
352 fprintf(msgfile, "lmmin diag ");
353 lm_print_pars(nout, x, msgfile); // xnorm
354 fprintf(msgfile, " xnorm = %18.8g\n", xnorm);
355 }
356 /* Only now print the header for the loop table. */
357 if (C->verbosity >= 3) {
358 fprintf(msgfile, " o i lmpar prered"
359 " ratio dirder delta"
360 " pnorm fnorm");
361 for (i = 0; i < nout; ++i)
362 fprintf(msgfile, " p%i", i);
363 fprintf(msgfile, "\n");
364 }
365 } else {
366 xnorm = lm_enorm(n, x);
367 }
368 if (!isfinite(xnorm)) {
369 S->outcome = 12; /* nan */
370 goto terminate;
371 }
372 /* Initialize the step bound delta. */
373 if (xnorm)
374 delta = C->stepbound * xnorm;
375 else
376 delta = C->stepbound;
377 } else {
378 if (C->scale_diag) {
379 for (j = 0; j < n; j++)
380 diag[j] = MAX(diag[j], wa2[j]);
381 }
382 }
383
384 /** The inner loop. **/
385 int inner = 0;
386 do {
387
388 /** Determine the Levenberg-Marquardt parameter. **/
389 lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
390 wa1, wa2, wf, wa3);
391 /* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
392
393 /* Predict scaled reduction. */
394 pnorm = lm_enorm(n, wa3);
395 if (!isfinite(pnorm)) {
396 S->outcome = 12; /* nan */
397 goto terminate;
398 }
399 temp2 = lmpar * SQR(pnorm / fnorm);
400 for (j = 0; j < n; j++) {
401 wa3[j] = 0;
402 for (i = 0; i <= j; i++)
403 wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
404 }
405 temp1 = SQR(lm_enorm(n, wa3) / fnorm);
406 if (!isfinite(temp1)) {
407 S->outcome = 12; /* nan */
408 goto terminate;
409 }
410 prered = temp1 + 2*temp2;
411 dirder = -temp1 + temp2; /* scaled directional derivative */
412
413 /* At first call, adjust the initial step bound. */
414 if (!outer && pnorm < delta)
415 delta = pnorm;
416
417 /** Evaluate the function at x + p. **/
418 for (j = 0; j < n; j++)
419 wa2[j] = x[j] - wa1[j];
420 (*evaluate)(wa2, m, data, wf, &(S->userbreak));
421 ++(S->nfev);
422 if (S->userbreak)
423 goto terminate;
424 fnorm1 = lm_enorm(m, wf);
425 if (!isfinite(fnorm1)) {
426 S->outcome = 12; /* nan */
427 goto terminate;
428 }
429
430 /** Evaluate the scaled reduction. **/
431
432 /* Actual scaled reduction. */
433 actred = 1 - SQR(fnorm1 / fnorm);
434
435 /* Ratio of actual to predicted reduction. */
436 ratio = prered ? actred / prered : 0;
437
438 if (C->verbosity == 2) {
439 fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
440 lm_print_pars(nout, wa2, msgfile); // fnorm1,
441 } else if (C->verbosity >= 3) {
442 printf("%3i %2i %9.2g %9.2g %14.6g"
443 " %9.2g %10.3e %10.3e %21.15e",
444 outer, inner, lmpar, prered, ratio,
445 dirder, delta, pnorm, fnorm1);
446 for (i = 0; i < nout; ++i)
447 fprintf(msgfile, " %16.9g", wa2[i]);
448 fprintf(msgfile, "\n");
449 }
450
451 /* Update the step bound. */
452 if (ratio <= 0.25) {
453 if (actred >= 0)
454 temp = 0.5;
455 else if (actred > -99) /* -99 = 1-1/0.1^2 */
456 temp = MAX(dirder / (2*dirder + actred), 0.1);
457 else
458 temp = 0.1;
459 delta = temp * MIN(delta, pnorm / 0.1);
460 lmpar /= temp;
461 } else if (ratio >= 0.75) {
462 delta = 2 * pnorm;
463 lmpar *= 0.5;
464 } else if (!lmpar) {
465 delta = 2 * pnorm;
466 }
467
468 /** On success, update solution, and test for convergence. **/
469
470 inner_success = ratio >= 1e-4;
471 if (inner_success) {
472
473 /* Update x, fvec, and their norms. */
474 if (C->scale_diag) {
475 for (j = 0; j < n; j++) {
476 x[j] = wa2[j];
477 wa2[j] = diag[j] * x[j];
478 }
479 } else {
480 for (j = 0; j < n; j++)
481 x[j] = wa2[j];
482 }
483 for (i = 0; i < m; i++)
484 fvec[i] = wf[i];
485 xnorm = lm_enorm(n, wa2);
486 if (!isfinite(xnorm)) {
487 S->outcome = 12; /* nan */
488 goto terminate;
489 }
490 fnorm = fnorm1;
491 }
492
493 /* Convergence tests. */
494 S->outcome = 0;
495 if (fnorm <= LM_DWARF)
496 goto terminate; /* success: sum of squares almost zero */
497 /* Test two criteria (both may be fulfilled). */
498 if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
499 S->outcome = 1; /* success: x almost stable */
500 if (delta <= C->xtol * xnorm)
501 S->outcome += 2; /* success: sum of squares almost stable */
502 if (S->outcome != 0) {
503 goto terminate;
504 }
505
506 /** Tests for termination and stringent tolerances. **/
507 if (S->nfev >= maxfev) {
508 S->outcome = 5;
509 goto terminate;
510 }
511 if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
512 ratio <= 2) {
513 S->outcome = 6;
514 goto terminate;
515 }
516 if (delta <= LM_MACHEP * xnorm) {
517 S->outcome = 7;
518 goto terminate;
519 }
520 if (gnorm <= LM_MACHEP) {
521 S->outcome = 8;
522 goto terminate;
523 }
524
525 /** End of the inner loop. Repeat if iteration unsuccessful. **/
526 ++inner;
527 } while (!inner_success);
528
529 }; /*** End of the outer loop. ***/
530
531 terminate:
532 S->fnorm = lm_enorm(m, fvec);
533 if (C->verbosity >= 2)
534 printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
535 xnorm, C->ftol, C->xtol);
536 if (C->verbosity & 1) {
537 fprintf(msgfile, "lmmin final ");
538 lm_print_pars(nout, x, msgfile); // S->fnorm,
539 fprintf(msgfile, " fnorm = %18.8g\n", S->fnorm);
540 }
541 if (S->userbreak) /* user-requested break */
542 S->outcome = 11;
543
544 /*** Deallocate the workspace. ***/
545 free(ws);
546
547 } /*** lmmin. ***/
548
549 /******************************************************************************/
550 /* lm_lmpar (determine Levenberg-Marquardt parameter) */
551 /******************************************************************************/
552
lm_lmpar(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,const double delta,double * par,double * x,double * Sdiag,double * aux,double * xdi)553 void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
554 const double* diag, const double* qtb, const double delta,
555 double* par, double* x, double* Sdiag, double* aux, double* xdi)
556 /* Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
557 * an m-vector b, and a positive number delta, the problem is to
558 * determine a parameter value par such that if x solves the system
559 *
560 * A*x = b and sqrt(par)*D*x = 0
561 *
562 * in the least squares sense, and dxnorm is the Euclidean norm of D*x,
563 * then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
564 * abs(dxnorm-delta) < 0.1*delta.
565 *
566 * Using lm_qrsolv, this subroutine completes the solution of the
567 * problem if it is provided with the necessary information from the
568 * QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
569 * where P is a permutation matrix, Q has orthogonal columns, and R is
570 * an upper triangular matrix with diagonal elements of nonincreasing
571 * magnitude, then lmpar expects the full upper triangle of R, the
572 * permutation matrix P, and the first n components of Q^T*b. On output
573 * lmpar also provides an upper triangular matrix S such that
574 *
575 * P^T*(A^T*A + par*D*D)*P = S^T*S.
576 *
577 * S is employed within lmpar and may be of separate interest.
578 *
579 * Only a few iterations are generally needed for convergence of the
580 * algorithm. If, however, the limit of 10 iterations is reached, then
581 * the output par will contain the best value obtained so far.
582 *
583 * Parameters:
584 *
585 * n is a positive integer INPUT variable set to the order of r.
586 *
587 * r is an n by n array. On INPUT the full upper triangle must contain
588 * the full upper triangle of the matrix R. On OUTPUT the full upper
589 * triangle is unaltered, and the strict lower triangle contains the
590 * strict upper triangle (transposed) of the upper triangular matrix S.
591 *
592 * ldr is a positive integer INPUT variable not less than n which
593 * specifies the leading dimension of the array R.
594 *
595 * Pivot is an integer INPUT array of length n which defines the
596 * permutation matrix P such that A*P = Q*R. Column j of P is column
597 * Pivot(j) of the identity matrix.
598 *
599 * diag is an INPUT array of length n which must contain the diagonal
600 * elements of the matrix D.
601 *
602 * qtb is an INPUT array of length n which must contain the first
603 * n elements of the vector Q^T*b.
604 *
605 * delta is a positive INPUT variable which specifies an upper bound
606 * on the Euclidean norm of D*x.
607 *
608 * par is a nonnegative variable. On INPUT par contains an initial
609 * estimate of the Levenberg-Marquardt parameter. On OUTPUT par
610 * contains the final estimate.
611 *
612 * x is an OUTPUT array of length n which contains the least-squares
613 * solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
614 *
615 * Sdiag is an array of length n needed as workspace; on OUTPUT it
616 * contains the diagonal elements of the upper triangular matrix S.
617 *
618 * aux is a multi-purpose work array of length n.
619 *
620 * xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
621 *
622 */
623 {
624 int i, iter, j, nsing;
625 double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
626 double sum, temp;
627 static double p1 = 0.1;
628
629 /*** Compute and store in x the Gauss-Newton direction. If the Jacobian
630 is rank-deficient, obtain a least-squares solution. ***/
631
632 nsing = n;
633 for (j = 0; j < n; j++) {
634 aux[j] = qtb[j];
635 if (r[j*ldr+j] == 0 && nsing == n)
636 nsing = j;
637 if (nsing < n)
638 aux[j] = 0;
639 }
640 for (j = nsing-1; j >= 0; j--) {
641 aux[j] = aux[j] / r[j+ldr*j];
642 temp = aux[j];
643 for (i = 0; i < j; i++)
644 aux[i] -= r[j*ldr+i] * temp;
645 }
646
647 for (j = 0; j < n; j++)
648 x[Pivot[j]] = aux[j];
649
650 /*** Initialize the iteration counter, evaluate the function at the origin,
651 and test for acceptance of the Gauss-Newton direction. ***/
652
653 for (j = 0; j < n; j++)
654 xdi[j] = diag[j] * x[j];
655 dxnorm = lm_enorm(n, xdi);
656 fp = dxnorm - delta;
657 if (fp <= p1 * delta) {
658 #ifdef LMFIT_DEBUG_MESSAGES
659 printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
660 #endif
661 *par = 0;
662 return;
663 }
664
665 /*** If the Jacobian is not rank deficient, the Newton step provides a
666 lower bound, parl, for the zero of the function. Otherwise set this
667 bound to zero. ***/
668
669 parl = 0;
670 if (nsing >= n) {
671 for (j = 0; j < n; j++)
672 aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
673
674 for (j = 0; j < n; j++) {
675 sum = 0;
676 for (i = 0; i < j; i++)
677 sum += r[j*ldr+i] * aux[i];
678 aux[j] = (aux[j] - sum) / r[j+ldr*j];
679 }
680 temp = lm_enorm(n, aux);
681 parl = fp / delta / temp / temp;
682 }
683
684 /*** Calculate an upper bound, paru, for the zero of the function. ***/
685
686 for (j = 0; j < n; j++) {
687 sum = 0;
688 for (i = 0; i <= j; i++)
689 sum += r[j*ldr+i] * qtb[i];
690 aux[j] = sum / diag[Pivot[j]];
691 }
692 gnorm = lm_enorm(n, aux);
693 paru = gnorm / delta;
694 if (paru == 0)
695 paru = LM_DWARF / MIN(delta, p1);
696
697 /*** If the input par lies outside of the interval (parl,paru),
698 set par to the closer endpoint. ***/
699
700 *par = MAX(*par, parl);
701 *par = MIN(*par, paru);
702 if (*par == 0)
703 *par = gnorm / dxnorm;
704
705 /*** Iterate. ***/
706
707 for (iter = 0;; iter++) {
708
709 /** Evaluate the function at the current value of par. **/
710 if (*par == 0)
711 *par = MAX(LM_DWARF, 0.001 * paru);
712 temp = sqrt(*par);
713 for (j = 0; j < n; j++)
714 aux[j] = temp * diag[j];
715
716 lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
717 /* return values are r, x, Sdiag */
718
719 for (j = 0; j < n; j++)
720 xdi[j] = diag[j] * x[j]; /* used as output */
721 dxnorm = lm_enorm(n, xdi);
722 fp_old = fp;
723 fp = dxnorm - delta;
724
725 /** If the function is small enough, accept the current value
726 of par. Also test for the exceptional cases where parl
727 is zero or the number of iterations has reached 10. **/
728 if (fabs(fp) <= p1 * delta ||
729 (parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
730 #ifdef LMFIT_DEBUG_MESSAGES
731 printf("debug lmpar nsing=%d, iter=%d, "
732 "par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
733 nsing, iter, *par, parl, paru, delta, fp);
734 #endif
735 break; /* the only exit from the iteration. */
736 }
737
738 /** Compute the Newton correction. **/
739 for (j = 0; j < n; j++)
740 aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
741
742 for (j = 0; j < n; j++) {
743 aux[j] = aux[j] / Sdiag[j];
744 for (i = j+1; i < n; i++)
745 aux[i] -= r[j*ldr+i] * aux[j];
746 }
747 temp = lm_enorm(n, aux);
748 parc = fp / delta / temp / temp;
749
750 /** Depending on the sign of the function, update parl or paru. **/
751 if (fp > 0)
752 parl = MAX(parl, *par);
753 else /* fp < 0 [the case fp==0 is precluded by the break condition] */
754 paru = MIN(paru, *par);
755
756 /** Compute an improved estimate for par. **/
757 *par = MAX(parl, *par + parc);
758 }
759
760 } /*** lm_lmpar. ***/
761
762 /******************************************************************************/
763 /* lm_qrfac (QR factorization, from lapack) */
764 /******************************************************************************/
765
lm_qrfac(const int m,const int n,double * A,int * Pivot,double * Rdiag,double * Acnorm,double * W)766 void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
767 double* Acnorm, double* W)
768 /*
769 * This subroutine uses Householder transformations with column pivoting
770 * to compute a QR factorization of the m by n matrix A. That is, qrfac
771 * determines an orthogonal matrix Q, a permutation matrix P, and an
772 * upper trapezoidal matrix R with diagonal elements of nonincreasing
773 * magnitude, such that A*P = Q*R. The Householder transformation for
774 * column k, k = 1,2,...,n, is of the form
775 *
776 * I - 2*w*wT/|w|^2
777 *
778 * where w has zeroes in the first k-1 positions.
779 *
780 * Parameters:
781 *
782 * m is an INPUT parameter set to the number of rows of A.
783 *
784 * n is an INPUT parameter set to the number of columns of A.
785 *
786 * A is an m by n array. On INPUT, A contains the matrix for which the
787 * QR factorization is to be computed. On OUTPUT the strict upper
788 * trapezoidal part of A contains the strict upper trapezoidal part
789 * of R, and the lower trapezoidal part of A contains a factored form
790 * of Q (the non-trivial elements of the vectors w described above).
791 *
792 * Pivot is an integer OUTPUT array of length n that describes the
793 * permutation matrix P. Column j of P is column Pivot(j) of the
794 * identity matrix.
795 *
796 * Rdiag is an OUTPUT array of length n which contains the diagonal
797 * elements of R.
798 *
799 * Acnorm is an OUTPUT array of length n which contains the norms of
800 * the corresponding columns of the input matrix A. If this information
801 * is not needed, then Acnorm can share storage with Rdiag.
802 *
803 * W is a work array of length n.
804 *
805 */
806 {
807 int i, j, k, kmax;
808 double ajnorm, sum, temp;
809
810 #ifdef LMFIT_DEBUG_MESSAGES
811 printf("debug qrfac\n");
812 #endif
813
814 /** Compute initial column norms;
815 initialize Pivot with identity permutation. ***/
816 for (j = 0; j < n; j++) {
817 W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
818 Pivot[j] = j;
819 }
820
821 /** Loop over columns of A. **/
822 assert(n <= m);
823 for (j = 0; j < n; j++) {
824
825 /** Bring the column of largest norm into the pivot position. **/
826 kmax = j;
827 for (k = j+1; k < n; k++)
828 if (Rdiag[k] > Rdiag[kmax])
829 kmax = k;
830
831 if (kmax != j) {
832 /* Swap columns j and kmax. */
833 k = Pivot[j];
834 Pivot[j] = Pivot[kmax];
835 Pivot[kmax] = k;
836 for (i = 0; i < m; i++) {
837 temp = A[j*m+i];
838 A[j*m+i] = A[kmax*m+i];
839 A[kmax*m+i] = temp;
840 }
841 /* Half-swap: Rdiag[j], W[j] won't be needed any further. */
842 Rdiag[kmax] = Rdiag[j];
843 W[kmax] = W[j];
844 }
845
846 /** Compute the Householder reflection vector w_j to reduce the
847 j-th column of A to a multiple of the j-th unit vector. **/
848 ajnorm = lm_enorm(m-j, &A[j*m+j]);
849 if (ajnorm == 0) {
850 Rdiag[j] = 0;
851 continue;
852 }
853
854 /* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
855 where the sign +- is chosen to avoid cancellation in w_jj. */
856 if (A[j*m+j] < 0)
857 ajnorm = -ajnorm;
858 for (i = j; i < m; i++)
859 A[j*m+i] /= ajnorm;
860 A[j*m+j] += 1;
861
862 /** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
863 to the remaining columns, and update the norms. **/
864 for (k = j+1; k < n; k++) {
865 /* Compute scalar product w_j * a_j. */
866 sum = 0;
867 for (i = j; i < m; i++)
868 sum += A[j*m+i] * A[k*m+i];
869
870 /* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
871 temp = sum / A[j*m+j];
872
873 /* Carry out transform U_w_j * a_k. */
874 for (i = j; i < m; i++)
875 A[k*m+i] -= temp * A[j*m+i];
876
877 /* No idea what happens here. */
878 if (Rdiag[k] != 0) {
879 temp = A[m*k+j] / Rdiag[k];
880 if (fabs(temp) < 1) {
881 Rdiag[k] *= sqrt(1 - SQR(temp));
882 temp = Rdiag[k] / W[k];
883 } else
884 temp = 0;
885 if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
886 Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
887 W[k] = Rdiag[k];
888 }
889 }
890 }
891
892 Rdiag[j] = -ajnorm;
893 }
894 } /*** lm_qrfac. ***/
895
896 /******************************************************************************/
897 /* lm_qrsolv (linear least-squares) */
898 /******************************************************************************/
899
lm_qrsolv(const int n,double * r,const int ldr,const int * Pivot,const double * diag,const double * qtb,double * x,double * Sdiag,double * W)900 void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
901 const double* diag, const double* qtb, double* x,
902 double* Sdiag, double* W)
903 /*
904 * Given an m by n matrix A, an n by n diagonal matrix D, and an
905 * m-vector b, the problem is to determine an x which solves the
906 * system
907 *
908 * A*x = b and D*x = 0
909 *
910 * in the least squares sense.
911 *
912 * This subroutine completes the solution of the problem if it is
913 * provided with the necessary information from the QR factorization,
914 * with column pivoting, of A. That is, if A*P = Q*R, where P is a
915 * permutation matrix, Q has orthogonal columns, and R is an upper
916 * triangular matrix with diagonal elements of nonincreasing magnitude,
917 * then qrsolv expects the full upper triangle of R, the permutation
918 * matrix P, and the first n components of Q^T*b. The system
919 * A*x = b, D*x = 0, is then equivalent to
920 *
921 * R*z = Q^T*b, P^T*D*P*z = 0,
922 *
923 * where x = P*z. If this system does not have full rank, then a least
924 * squares solution is obtained. On output qrsolv also provides an upper
925 * triangular matrix S such that
926 *
927 * P^T*(A^T*A + D*D)*P = S^T*S.
928 *
929 * S is computed within qrsolv and may be of separate interest.
930 *
931 * Parameters:
932 *
933 * n is a positive integer INPUT variable set to the order of R.
934 *
935 * r is an n by n array. On INPUT the full upper triangle must contain
936 * the full upper triangle of the matrix R. On OUTPUT the full upper
937 * triangle is unaltered, and the strict lower triangle contains the
938 * strict upper triangle (transposed) of the upper triangular matrix S.
939 *
940 * ldr is a positive integer INPUT variable not less than n which
941 * specifies the leading dimension of the array R.
942 *
943 * Pivot is an integer INPUT array of length n which defines the
944 * permutation matrix P such that A*P = Q*R. Column j of P is column
945 * Pivot(j) of the identity matrix.
946 *
947 * diag is an INPUT array of length n which must contain the diagonal
948 * elements of the matrix D.
949 *
950 * qtb is an INPUT array of length n which must contain the first
951 * n elements of the vector Q^T*b.
952 *
953 * x is an OUTPUT array of length n which contains the least-squares
954 * solution of the system A*x = b, D*x = 0.
955 *
956 * Sdiag is an OUTPUT array of length n which contains the diagonal
957 * elements of the upper triangular matrix S.
958 *
959 * W is a work array of length n.
960 *
961 */
962 {
963 int i, kk, j, k, nsing;
964 double qtbpj, sum, temp;
965 double _sin, _cos, _tan, _cot; /* local variables, not functions */
966
967 /*** Copy R and Q^T*b to preserve input and initialize S.
968 In particular, save the diagonal elements of R in x. ***/
969
970 for (j = 0; j < n; j++) {
971 for (i = j; i < n; i++)
972 r[j*ldr+i] = r[i*ldr+j];
973 x[j] = r[j*ldr+j];
974 W[j] = qtb[j];
975 }
976
977 /*** Eliminate the diagonal matrix D using a Givens rotation. ***/
978
979 for (j = 0; j < n; j++) {
980
981 /*** Prepare the row of D to be eliminated, locating the diagonal
982 element using P from the QR factorization. ***/
983
984 if (diag[Pivot[j]] != 0) {
985 for (k = j; k < n; k++)
986 Sdiag[k] = 0;
987 Sdiag[j] = diag[Pivot[j]];
988
989 /*** The transformations to eliminate the row of D modify only
990 a single element of Q^T*b beyond the first n, which is
991 initially 0. ***/
992
993 qtbpj = 0;
994 for (k = j; k < n; k++) {
995
996 /** Determine a Givens rotation which eliminates the
997 appropriate element in the current row of D. **/
998 if (Sdiag[k] == 0)
999 continue;
1000 kk = k + ldr * k;
1001 if (fabs(r[kk]) < fabs(Sdiag[k])) {
1002 _cot = r[kk] / Sdiag[k];
1003 _sin = 1 / hypot(1, _cot);
1004 _cos = _sin * _cot;
1005 } else {
1006 _tan = Sdiag[k] / r[kk];
1007 _cos = 1 / hypot(1, _tan);
1008 _sin = _cos * _tan;
1009 }
1010
1011 /** Compute the modified diagonal element of R and
1012 the modified element of (Q^T*b,0). **/
1013 r[kk] = _cos * r[kk] + _sin * Sdiag[k];
1014 temp = _cos * W[k] + _sin * qtbpj;
1015 qtbpj = -_sin * W[k] + _cos * qtbpj;
1016 W[k] = temp;
1017
1018 /** Accumulate the tranformation in the row of S. **/
1019 for (i = k+1; i < n; i++) {
1020 temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
1021 Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
1022 r[k*ldr+i] = temp;
1023 }
1024 }
1025 }
1026
1027 /** Store the diagonal element of S and restore
1028 the corresponding diagonal element of R. **/
1029 Sdiag[j] = r[j*ldr+j];
1030 r[j*ldr+j] = x[j];
1031 }
1032
1033 /*** Solve the triangular system for z. If the system is singular, then
1034 obtain a least-squares solution. ***/
1035
1036 nsing = n;
1037 for (j = 0; j < n; j++) {
1038 if (Sdiag[j] == 0 && nsing == n)
1039 nsing = j;
1040 if (nsing < n)
1041 W[j] = 0;
1042 }
1043
1044 for (j = nsing-1; j >= 0; j--) {
1045 sum = 0;
1046 for (i = j+1; i < nsing; i++)
1047 sum += r[j*ldr+i] * W[i];
1048 W[j] = (W[j] - sum) / Sdiag[j];
1049 }
1050
1051 /*** Permute the components of z back to components of x. ***/
1052
1053 for (j = 0; j < n; j++)
1054 x[Pivot[j]] = W[j];
1055
1056 } /*** lm_qrsolv. ***/
1057
1058 /******************************************************************************/
1059 /* lm_enorm (Euclidean norm) */
1060 /******************************************************************************/
1061
lm_enorm(int n,const double * x)1062 double lm_enorm(int n, const double* x)
1063 /* This function calculates the Euclidean norm of an n-vector x.
1064 *
1065 * The Euclidean norm is computed by accumulating the sum of squares
1066 * in three different sums. The sums of squares for the small and large
1067 * components are scaled so that no overflows occur. Non-destructive
1068 * underflows are permitted. Underflows and overflows do not occur in
1069 * the computation of the unscaled sum of squares for the intermediate
1070 * components. The definitions of small, intermediate and large components
1071 * depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
1072 * restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
1073 * and LM_SQRT_GIANT**2 not overflow.
1074 *
1075 * Parameters:
1076 *
1077 * n is a positive integer INPUT variable.
1078 *
1079 * x is an INPUT array of length n.
1080 */
1081 {
1082 int i;
1083 double agiant, s1, s2, s3, xabs, x1max, x3max;
1084
1085 s1 = 0;
1086 s2 = 0;
1087 s3 = 0;
1088 x1max = 0;
1089 x3max = 0;
1090 agiant = LM_SQRT_GIANT / n;
1091
1092 /** Sum squares. **/
1093 for (i = 0; i < n; i++) {
1094 xabs = fabs(x[i]);
1095 if (xabs > LM_SQRT_DWARF) {
1096 if (xabs < agiant) {
1097 s2 += SQR(xabs);
1098 } else if (xabs > x1max) {
1099 s1 = 1 + s1 * SQR(x1max / xabs);
1100 x1max = xabs;
1101 } else {
1102 s1 += SQR(xabs / x1max);
1103 }
1104 } else if (xabs > x3max) {
1105 s3 = 1 + s3 * SQR(x3max / xabs);
1106 x3max = xabs;
1107 } else if (xabs != 0) {
1108 s3 += SQR(xabs / x3max);
1109 }
1110 }
1111
1112 /** Calculate the norm. **/
1113 if (s1 != 0)
1114 return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1115 else if (s2 != 0)
1116 if (s2 >= x3max)
1117 return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1118 else
1119 return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1120 else
1121 return x3max * sqrt(s3);
1122
1123 } /*** lm_enorm. ***/
1124