1 /*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
22
23 /*
24
25
26 THE ALGORITHM
27 -------------
28
29 solve A*x = b+w, with x and w subject to certain LCP conditions.
30 each x(i),w(i) must lie on one of the three line segments in the following
31 diagram. each line segment corresponds to one index set :
32
33 w(i)
34 /|\ | :
35 | | :
36 | |i in N :
37 w>0 | |state[i]=0 :
38 | | :
39 | | : i in C
40 w=0 + +-----------------------+
41 | : |
42 | : |
43 w<0 | : |i in N
44 | : |state[i]=1
45 | : |
46 | : |
47 +-------|-----------|-----------|----------> x(i)
48 lo 0 hi
49
50 the Dantzig algorithm proceeds as follows:
51 for i=1:n
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
57 it hits.
58
59 we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60 simpler, because the starting point for x(i),w(i) is always on the dotted
61 line x=0 and x will only ever increase in one direction, so it can only hit
62 two out of the three line segments.
63
64
65 NOTES
66 -----
67
68 this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69 the implementation is split into an LCP problem object (btLCP) and an LCP
70 driver function. most optimization occurs in the btLCP object.
71
72 a naive implementation of the algorithm requires either a lot of data motion
73 or a lot of permutation-array lookup, because we are constantly re-ordering
74 rows and columns. to avoid this and make a more optimized algorithm, a
75 non-trivial data structure is used to represent the matrix A (this is
76 implemented in the fast version of the btLCP object).
77
78 during execution of this algorithm, some indexes in A are clamped (set C),
79 some are non-clamped (set N), and some are "don't care" (where x=0).
80 A,x,b,w (and other problem vectors) are permuted such that the clamped
81 indexes are first, the unclamped indexes are next, and the don't-care
82 indexes are last. this permutation is recorded in the array `p'.
83 initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84 the corresponding elements of p are swapped.
85
86 because the C and N elements are grouped together in the rows of A, we can do
87 lots of work with a fast dot product function. if A,x,etc were not permuted
88 and we only had a permutation array, then those dot products would be much
89 slower as we would have a permutation array lookup in some inner loops.
90
91 A is accessed through an array of row pointers, so that element (i,j) of the
92 permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93 we still have to actually move the data.
94
95 during execution of this algorithm we maintain an L*D*L' factorization of
96 the clamped submatrix of A (call it `AC') which is the top left nC*nC
97 submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99 (1) AC is always permuted such that L*D*L' = AC. this causes a problem
100 when a row/column is removed from C, because then all the rows/columns of A
101 between the deleted index and the end of C need to be rotated downward.
102 this results in a lot of data motion and slows things down.
103 (2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104 itself a permutation of the underlying A). this is what we do - the
105 permutation is recorded in the vector C. call this permutation A[C,C].
106 when a row/column is removed from C, all we have to do is swap two
107 rows/columns and manipulate C.
108
109 */
110
111
112 #include "btDantzigLCP.h"
113
114 #include <string.h>//memcpy
115
116 bool s_error = false;
117
118 //***************************************************************************
119 // code generation parameters
120
121
122 #define btLCP_FAST // use fast btLCP object
123
124 // option 1 : matrix row pointers (less data copying)
125 #define BTROWPTRS
126 #define BTATYPE btScalar **
127 #define BTAROW(i) (m_A[i])
128
129 // option 2 : no matrix row pointers (slightly faster inner loops)
130 //#define NOROWPTRS
131 //#define BTATYPE btScalar *
132 //#define BTAROW(i) (m_A+(i)*m_nskip)
133
134 #define BTNUB_OPTIMIZATIONS
135
136
137
138 /* solve L*X=B, with B containing 1 right hand sides.
139 * L is an n*n lower triangular matrix with ones on the diagonal.
140 * L is stored by rows and its leading dimension is lskip.
141 * B is an n*1 matrix that contains the right hand sides.
142 * B is stored by columns and its leading dimension is also lskip.
143 * B is overwritten with X.
144 * this processes blocks of 2*2.
145 * if this is in the factorizer source file, n must be a multiple of 2.
146 */
147
btSolveL1_1(const btScalar * L,btScalar * B,int n,int lskip1)148 static void btSolveL1_1 (const btScalar *L, btScalar *B, int n, int lskip1)
149 {
150 /* declare variables - Z matrix, p and q vectors, etc */
151 btScalar Z11,m11,Z21,m21,p1,q1,p2,*ex;
152 const btScalar *ell;
153 int i,j;
154 /* compute all 2 x 1 blocks of X */
155 for (i=0; i < n; i+=2) {
156 /* compute all 2 x 1 block of X, from rows i..i+2-1 */
157 /* set the Z matrix to 0 */
158 Z11=0;
159 Z21=0;
160 ell = L + i*lskip1;
161 ex = B;
162 /* the inner loop that computes outer products and adds them to Z */
163 for (j=i-2; j >= 0; j -= 2) {
164 /* compute outer product and add it to the Z matrix */
165 p1=ell[0];
166 q1=ex[0];
167 m11 = p1 * q1;
168 p2=ell[lskip1];
169 m21 = p2 * q1;
170 Z11 += m11;
171 Z21 += m21;
172 /* compute outer product and add it to the Z matrix */
173 p1=ell[1];
174 q1=ex[1];
175 m11 = p1 * q1;
176 p2=ell[1+lskip1];
177 m21 = p2 * q1;
178 /* advance pointers */
179 ell += 2;
180 ex += 2;
181 Z11 += m11;
182 Z21 += m21;
183 /* end of inner loop */
184 }
185 /* compute left-over iterations */
186 j += 2;
187 for (; j > 0; j--) {
188 /* compute outer product and add it to the Z matrix */
189 p1=ell[0];
190 q1=ex[0];
191 m11 = p1 * q1;
192 p2=ell[lskip1];
193 m21 = p2 * q1;
194 /* advance pointers */
195 ell += 1;
196 ex += 1;
197 Z11 += m11;
198 Z21 += m21;
199 }
200 /* finish computing the X(i) block */
201 Z11 = ex[0] - Z11;
202 ex[0] = Z11;
203 p1 = ell[lskip1];
204 Z21 = ex[1] - Z21 - p1*Z11;
205 ex[1] = Z21;
206 /* end of outer loop */
207 }
208 }
209
210 /* solve L*X=B, with B containing 2 right hand sides.
211 * L is an n*n lower triangular matrix with ones on the diagonal.
212 * L is stored by rows and its leading dimension is lskip.
213 * B is an n*2 matrix that contains the right hand sides.
214 * B is stored by columns and its leading dimension is also lskip.
215 * B is overwritten with X.
216 * this processes blocks of 2*2.
217 * if this is in the factorizer source file, n must be a multiple of 2.
218 */
219
btSolveL1_2(const btScalar * L,btScalar * B,int n,int lskip1)220 static void btSolveL1_2 (const btScalar *L, btScalar *B, int n, int lskip1)
221 {
222 /* declare variables - Z matrix, p and q vectors, etc */
223 btScalar Z11,m11,Z12,m12,Z21,m21,Z22,m22,p1,q1,p2,q2,*ex;
224 const btScalar *ell;
225 int i,j;
226 /* compute all 2 x 2 blocks of X */
227 for (i=0; i < n; i+=2) {
228 /* compute all 2 x 2 block of X, from rows i..i+2-1 */
229 /* set the Z matrix to 0 */
230 Z11=0;
231 Z12=0;
232 Z21=0;
233 Z22=0;
234 ell = L + i*lskip1;
235 ex = B;
236 /* the inner loop that computes outer products and adds them to Z */
237 for (j=i-2; j >= 0; j -= 2) {
238 /* compute outer product and add it to the Z matrix */
239 p1=ell[0];
240 q1=ex[0];
241 m11 = p1 * q1;
242 q2=ex[lskip1];
243 m12 = p1 * q2;
244 p2=ell[lskip1];
245 m21 = p2 * q1;
246 m22 = p2 * q2;
247 Z11 += m11;
248 Z12 += m12;
249 Z21 += m21;
250 Z22 += m22;
251 /* compute outer product and add it to the Z matrix */
252 p1=ell[1];
253 q1=ex[1];
254 m11 = p1 * q1;
255 q2=ex[1+lskip1];
256 m12 = p1 * q2;
257 p2=ell[1+lskip1];
258 m21 = p2 * q1;
259 m22 = p2 * q2;
260 /* advance pointers */
261 ell += 2;
262 ex += 2;
263 Z11 += m11;
264 Z12 += m12;
265 Z21 += m21;
266 Z22 += m22;
267 /* end of inner loop */
268 }
269 /* compute left-over iterations */
270 j += 2;
271 for (; j > 0; j--) {
272 /* compute outer product and add it to the Z matrix */
273 p1=ell[0];
274 q1=ex[0];
275 m11 = p1 * q1;
276 q2=ex[lskip1];
277 m12 = p1 * q2;
278 p2=ell[lskip1];
279 m21 = p2 * q1;
280 m22 = p2 * q2;
281 /* advance pointers */
282 ell += 1;
283 ex += 1;
284 Z11 += m11;
285 Z12 += m12;
286 Z21 += m21;
287 Z22 += m22;
288 }
289 /* finish computing the X(i) block */
290 Z11 = ex[0] - Z11;
291 ex[0] = Z11;
292 Z12 = ex[lskip1] - Z12;
293 ex[lskip1] = Z12;
294 p1 = ell[lskip1];
295 Z21 = ex[1] - Z21 - p1*Z11;
296 ex[1] = Z21;
297 Z22 = ex[1+lskip1] - Z22 - p1*Z12;
298 ex[1+lskip1] = Z22;
299 /* end of outer loop */
300 }
301 }
302
303
btFactorLDLT(btScalar * A,btScalar * d,int n,int nskip1)304 void btFactorLDLT (btScalar *A, btScalar *d, int n, int nskip1)
305 {
306 int i,j;
307 btScalar sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22;
308 if (n < 1) return;
309
310 for (i=0; i<=n-2; i += 2) {
311 /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
312 btSolveL1_2 (A,A+i*nskip1,i,nskip1);
313 /* scale the elements in a 2 x i block at A(i,0), and also */
314 /* compute Z = the outer product matrix that we'll need. */
315 Z11 = 0;
316 Z21 = 0;
317 Z22 = 0;
318 ell = A+i*nskip1;
319 dee = d;
320 for (j=i-6; j >= 0; j -= 6) {
321 p1 = ell[0];
322 p2 = ell[nskip1];
323 dd = dee[0];
324 q1 = p1*dd;
325 q2 = p2*dd;
326 ell[0] = q1;
327 ell[nskip1] = q2;
328 m11 = p1*q1;
329 m21 = p2*q1;
330 m22 = p2*q2;
331 Z11 += m11;
332 Z21 += m21;
333 Z22 += m22;
334 p1 = ell[1];
335 p2 = ell[1+nskip1];
336 dd = dee[1];
337 q1 = p1*dd;
338 q2 = p2*dd;
339 ell[1] = q1;
340 ell[1+nskip1] = q2;
341 m11 = p1*q1;
342 m21 = p2*q1;
343 m22 = p2*q2;
344 Z11 += m11;
345 Z21 += m21;
346 Z22 += m22;
347 p1 = ell[2];
348 p2 = ell[2+nskip1];
349 dd = dee[2];
350 q1 = p1*dd;
351 q2 = p2*dd;
352 ell[2] = q1;
353 ell[2+nskip1] = q2;
354 m11 = p1*q1;
355 m21 = p2*q1;
356 m22 = p2*q2;
357 Z11 += m11;
358 Z21 += m21;
359 Z22 += m22;
360 p1 = ell[3];
361 p2 = ell[3+nskip1];
362 dd = dee[3];
363 q1 = p1*dd;
364 q2 = p2*dd;
365 ell[3] = q1;
366 ell[3+nskip1] = q2;
367 m11 = p1*q1;
368 m21 = p2*q1;
369 m22 = p2*q2;
370 Z11 += m11;
371 Z21 += m21;
372 Z22 += m22;
373 p1 = ell[4];
374 p2 = ell[4+nskip1];
375 dd = dee[4];
376 q1 = p1*dd;
377 q2 = p2*dd;
378 ell[4] = q1;
379 ell[4+nskip1] = q2;
380 m11 = p1*q1;
381 m21 = p2*q1;
382 m22 = p2*q2;
383 Z11 += m11;
384 Z21 += m21;
385 Z22 += m22;
386 p1 = ell[5];
387 p2 = ell[5+nskip1];
388 dd = dee[5];
389 q1 = p1*dd;
390 q2 = p2*dd;
391 ell[5] = q1;
392 ell[5+nskip1] = q2;
393 m11 = p1*q1;
394 m21 = p2*q1;
395 m22 = p2*q2;
396 Z11 += m11;
397 Z21 += m21;
398 Z22 += m22;
399 ell += 6;
400 dee += 6;
401 }
402 /* compute left-over iterations */
403 j += 6;
404 for (; j > 0; j--) {
405 p1 = ell[0];
406 p2 = ell[nskip1];
407 dd = dee[0];
408 q1 = p1*dd;
409 q2 = p2*dd;
410 ell[0] = q1;
411 ell[nskip1] = q2;
412 m11 = p1*q1;
413 m21 = p2*q1;
414 m22 = p2*q2;
415 Z11 += m11;
416 Z21 += m21;
417 Z22 += m22;
418 ell++;
419 dee++;
420 }
421 /* solve for diagonal 2 x 2 block at A(i,i) */
422 Z11 = ell[0] - Z11;
423 Z21 = ell[nskip1] - Z21;
424 Z22 = ell[1+nskip1] - Z22;
425 dee = d + i;
426 /* factorize 2 x 2 block Z,dee */
427 /* factorize row 1 */
428 dee[0] = btRecip(Z11);
429 /* factorize row 2 */
430 sum = 0;
431 q1 = Z21;
432 q2 = q1 * dee[0];
433 Z21 = q2;
434 sum += q1*q2;
435 dee[1] = btRecip(Z22 - sum);
436 /* done factorizing 2 x 2 block */
437 ell[nskip1] = Z21;
438 }
439 /* compute the (less than 2) rows at the bottom */
440 switch (n-i) {
441 case 0:
442 break;
443
444 case 1:
445 btSolveL1_1 (A,A+i*nskip1,i,nskip1);
446 /* scale the elements in a 1 x i block at A(i,0), and also */
447 /* compute Z = the outer product matrix that we'll need. */
448 Z11 = 0;
449 ell = A+i*nskip1;
450 dee = d;
451 for (j=i-6; j >= 0; j -= 6) {
452 p1 = ell[0];
453 dd = dee[0];
454 q1 = p1*dd;
455 ell[0] = q1;
456 m11 = p1*q1;
457 Z11 += m11;
458 p1 = ell[1];
459 dd = dee[1];
460 q1 = p1*dd;
461 ell[1] = q1;
462 m11 = p1*q1;
463 Z11 += m11;
464 p1 = ell[2];
465 dd = dee[2];
466 q1 = p1*dd;
467 ell[2] = q1;
468 m11 = p1*q1;
469 Z11 += m11;
470 p1 = ell[3];
471 dd = dee[3];
472 q1 = p1*dd;
473 ell[3] = q1;
474 m11 = p1*q1;
475 Z11 += m11;
476 p1 = ell[4];
477 dd = dee[4];
478 q1 = p1*dd;
479 ell[4] = q1;
480 m11 = p1*q1;
481 Z11 += m11;
482 p1 = ell[5];
483 dd = dee[5];
484 q1 = p1*dd;
485 ell[5] = q1;
486 m11 = p1*q1;
487 Z11 += m11;
488 ell += 6;
489 dee += 6;
490 }
491 /* compute left-over iterations */
492 j += 6;
493 for (; j > 0; j--) {
494 p1 = ell[0];
495 dd = dee[0];
496 q1 = p1*dd;
497 ell[0] = q1;
498 m11 = p1*q1;
499 Z11 += m11;
500 ell++;
501 dee++;
502 }
503 /* solve for diagonal 1 x 1 block at A(i,i) */
504 Z11 = ell[0] - Z11;
505 dee = d + i;
506 /* factorize 1 x 1 block Z,dee */
507 /* factorize row 1 */
508 dee[0] = btRecip(Z11);
509 /* done factorizing 1 x 1 block */
510 break;
511
512 //default: *((char*)0)=0; /* this should never happen! */
513 }
514 }
515
516 /* solve L*X=B, with B containing 1 right hand sides.
517 * L is an n*n lower triangular matrix with ones on the diagonal.
518 * L is stored by rows and its leading dimension is lskip.
519 * B is an n*1 matrix that contains the right hand sides.
520 * B is stored by columns and its leading dimension is also lskip.
521 * B is overwritten with X.
522 * this processes blocks of 4*4.
523 * if this is in the factorizer source file, n must be a multiple of 4.
524 */
525
btSolveL1(const btScalar * L,btScalar * B,int n,int lskip1)526 void btSolveL1 (const btScalar *L, btScalar *B, int n, int lskip1)
527 {
528 /* declare variables - Z matrix, p and q vectors, etc */
529 btScalar Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex;
530 const btScalar *ell;
531 int lskip2,lskip3,i,j;
532 /* compute lskip values */
533 lskip2 = 2*lskip1;
534 lskip3 = 3*lskip1;
535 /* compute all 4 x 1 blocks of X */
536 for (i=0; i <= n-4; i+=4) {
537 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
538 /* set the Z matrix to 0 */
539 Z11=0;
540 Z21=0;
541 Z31=0;
542 Z41=0;
543 ell = L + i*lskip1;
544 ex = B;
545 /* the inner loop that computes outer products and adds them to Z */
546 for (j=i-12; j >= 0; j -= 12) {
547 /* load p and q values */
548 p1=ell[0];
549 q1=ex[0];
550 p2=ell[lskip1];
551 p3=ell[lskip2];
552 p4=ell[lskip3];
553 /* compute outer product and add it to the Z matrix */
554 Z11 += p1 * q1;
555 Z21 += p2 * q1;
556 Z31 += p3 * q1;
557 Z41 += p4 * q1;
558 /* load p and q values */
559 p1=ell[1];
560 q1=ex[1];
561 p2=ell[1+lskip1];
562 p3=ell[1+lskip2];
563 p4=ell[1+lskip3];
564 /* compute outer product and add it to the Z matrix */
565 Z11 += p1 * q1;
566 Z21 += p2 * q1;
567 Z31 += p3 * q1;
568 Z41 += p4 * q1;
569 /* load p and q values */
570 p1=ell[2];
571 q1=ex[2];
572 p2=ell[2+lskip1];
573 p3=ell[2+lskip2];
574 p4=ell[2+lskip3];
575 /* compute outer product and add it to the Z matrix */
576 Z11 += p1 * q1;
577 Z21 += p2 * q1;
578 Z31 += p3 * q1;
579 Z41 += p4 * q1;
580 /* load p and q values */
581 p1=ell[3];
582 q1=ex[3];
583 p2=ell[3+lskip1];
584 p3=ell[3+lskip2];
585 p4=ell[3+lskip3];
586 /* compute outer product and add it to the Z matrix */
587 Z11 += p1 * q1;
588 Z21 += p2 * q1;
589 Z31 += p3 * q1;
590 Z41 += p4 * q1;
591 /* load p and q values */
592 p1=ell[4];
593 q1=ex[4];
594 p2=ell[4+lskip1];
595 p3=ell[4+lskip2];
596 p4=ell[4+lskip3];
597 /* compute outer product and add it to the Z matrix */
598 Z11 += p1 * q1;
599 Z21 += p2 * q1;
600 Z31 += p3 * q1;
601 Z41 += p4 * q1;
602 /* load p and q values */
603 p1=ell[5];
604 q1=ex[5];
605 p2=ell[5+lskip1];
606 p3=ell[5+lskip2];
607 p4=ell[5+lskip3];
608 /* compute outer product and add it to the Z matrix */
609 Z11 += p1 * q1;
610 Z21 += p2 * q1;
611 Z31 += p3 * q1;
612 Z41 += p4 * q1;
613 /* load p and q values */
614 p1=ell[6];
615 q1=ex[6];
616 p2=ell[6+lskip1];
617 p3=ell[6+lskip2];
618 p4=ell[6+lskip3];
619 /* compute outer product and add it to the Z matrix */
620 Z11 += p1 * q1;
621 Z21 += p2 * q1;
622 Z31 += p3 * q1;
623 Z41 += p4 * q1;
624 /* load p and q values */
625 p1=ell[7];
626 q1=ex[7];
627 p2=ell[7+lskip1];
628 p3=ell[7+lskip2];
629 p4=ell[7+lskip3];
630 /* compute outer product and add it to the Z matrix */
631 Z11 += p1 * q1;
632 Z21 += p2 * q1;
633 Z31 += p3 * q1;
634 Z41 += p4 * q1;
635 /* load p and q values */
636 p1=ell[8];
637 q1=ex[8];
638 p2=ell[8+lskip1];
639 p3=ell[8+lskip2];
640 p4=ell[8+lskip3];
641 /* compute outer product and add it to the Z matrix */
642 Z11 += p1 * q1;
643 Z21 += p2 * q1;
644 Z31 += p3 * q1;
645 Z41 += p4 * q1;
646 /* load p and q values */
647 p1=ell[9];
648 q1=ex[9];
649 p2=ell[9+lskip1];
650 p3=ell[9+lskip2];
651 p4=ell[9+lskip3];
652 /* compute outer product and add it to the Z matrix */
653 Z11 += p1 * q1;
654 Z21 += p2 * q1;
655 Z31 += p3 * q1;
656 Z41 += p4 * q1;
657 /* load p and q values */
658 p1=ell[10];
659 q1=ex[10];
660 p2=ell[10+lskip1];
661 p3=ell[10+lskip2];
662 p4=ell[10+lskip3];
663 /* compute outer product and add it to the Z matrix */
664 Z11 += p1 * q1;
665 Z21 += p2 * q1;
666 Z31 += p3 * q1;
667 Z41 += p4 * q1;
668 /* load p and q values */
669 p1=ell[11];
670 q1=ex[11];
671 p2=ell[11+lskip1];
672 p3=ell[11+lskip2];
673 p4=ell[11+lskip3];
674 /* compute outer product and add it to the Z matrix */
675 Z11 += p1 * q1;
676 Z21 += p2 * q1;
677 Z31 += p3 * q1;
678 Z41 += p4 * q1;
679 /* advance pointers */
680 ell += 12;
681 ex += 12;
682 /* end of inner loop */
683 }
684 /* compute left-over iterations */
685 j += 12;
686 for (; j > 0; j--) {
687 /* load p and q values */
688 p1=ell[0];
689 q1=ex[0];
690 p2=ell[lskip1];
691 p3=ell[lskip2];
692 p4=ell[lskip3];
693 /* compute outer product and add it to the Z matrix */
694 Z11 += p1 * q1;
695 Z21 += p2 * q1;
696 Z31 += p3 * q1;
697 Z41 += p4 * q1;
698 /* advance pointers */
699 ell += 1;
700 ex += 1;
701 }
702 /* finish computing the X(i) block */
703 Z11 = ex[0] - Z11;
704 ex[0] = Z11;
705 p1 = ell[lskip1];
706 Z21 = ex[1] - Z21 - p1*Z11;
707 ex[1] = Z21;
708 p1 = ell[lskip2];
709 p2 = ell[1+lskip2];
710 Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21;
711 ex[2] = Z31;
712 p1 = ell[lskip3];
713 p2 = ell[1+lskip3];
714 p3 = ell[2+lskip3];
715 Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
716 ex[3] = Z41;
717 /* end of outer loop */
718 }
719 /* compute rows at end that are not a multiple of block size */
720 for (; i < n; i++) {
721 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
722 /* set the Z matrix to 0 */
723 Z11=0;
724 ell = L + i*lskip1;
725 ex = B;
726 /* the inner loop that computes outer products and adds them to Z */
727 for (j=i-12; j >= 0; j -= 12) {
728 /* load p and q values */
729 p1=ell[0];
730 q1=ex[0];
731 /* compute outer product and add it to the Z matrix */
732 Z11 += p1 * q1;
733 /* load p and q values */
734 p1=ell[1];
735 q1=ex[1];
736 /* compute outer product and add it to the Z matrix */
737 Z11 += p1 * q1;
738 /* load p and q values */
739 p1=ell[2];
740 q1=ex[2];
741 /* compute outer product and add it to the Z matrix */
742 Z11 += p1 * q1;
743 /* load p and q values */
744 p1=ell[3];
745 q1=ex[3];
746 /* compute outer product and add it to the Z matrix */
747 Z11 += p1 * q1;
748 /* load p and q values */
749 p1=ell[4];
750 q1=ex[4];
751 /* compute outer product and add it to the Z matrix */
752 Z11 += p1 * q1;
753 /* load p and q values */
754 p1=ell[5];
755 q1=ex[5];
756 /* compute outer product and add it to the Z matrix */
757 Z11 += p1 * q1;
758 /* load p and q values */
759 p1=ell[6];
760 q1=ex[6];
761 /* compute outer product and add it to the Z matrix */
762 Z11 += p1 * q1;
763 /* load p and q values */
764 p1=ell[7];
765 q1=ex[7];
766 /* compute outer product and add it to the Z matrix */
767 Z11 += p1 * q1;
768 /* load p and q values */
769 p1=ell[8];
770 q1=ex[8];
771 /* compute outer product and add it to the Z matrix */
772 Z11 += p1 * q1;
773 /* load p and q values */
774 p1=ell[9];
775 q1=ex[9];
776 /* compute outer product and add it to the Z matrix */
777 Z11 += p1 * q1;
778 /* load p and q values */
779 p1=ell[10];
780 q1=ex[10];
781 /* compute outer product and add it to the Z matrix */
782 Z11 += p1 * q1;
783 /* load p and q values */
784 p1=ell[11];
785 q1=ex[11];
786 /* compute outer product and add it to the Z matrix */
787 Z11 += p1 * q1;
788 /* advance pointers */
789 ell += 12;
790 ex += 12;
791 /* end of inner loop */
792 }
793 /* compute left-over iterations */
794 j += 12;
795 for (; j > 0; j--) {
796 /* load p and q values */
797 p1=ell[0];
798 q1=ex[0];
799 /* compute outer product and add it to the Z matrix */
800 Z11 += p1 * q1;
801 /* advance pointers */
802 ell += 1;
803 ex += 1;
804 }
805 /* finish computing the X(i) block */
806 Z11 = ex[0] - Z11;
807 ex[0] = Z11;
808 }
809 }
810
811 /* solve L^T * x=b, with b containing 1 right hand side.
812 * L is an n*n lower triangular matrix with ones on the diagonal.
813 * L is stored by rows and its leading dimension is lskip.
814 * b is an n*1 matrix that contains the right hand side.
815 * b is overwritten with x.
816 * this processes blocks of 4.
817 */
818
btSolveL1T(const btScalar * L,btScalar * B,int n,int lskip1)819 void btSolveL1T (const btScalar *L, btScalar *B, int n, int lskip1)
820 {
821 /* declare variables - Z matrix, p and q vectors, etc */
822 btScalar Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
823 const btScalar *ell;
824 int lskip2,i,j;
825 // int lskip3;
826 /* special handling for L and B because we're solving L1 *transpose* */
827 L = L + (n-1)*(lskip1+1);
828 B = B + n-1;
829 lskip1 = -lskip1;
830 /* compute lskip values */
831 lskip2 = 2*lskip1;
832 //lskip3 = 3*lskip1;
833 /* compute all 4 x 1 blocks of X */
834 for (i=0; i <= n-4; i+=4) {
835 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
836 /* set the Z matrix to 0 */
837 Z11=0;
838 Z21=0;
839 Z31=0;
840 Z41=0;
841 ell = L - i;
842 ex = B;
843 /* the inner loop that computes outer products and adds them to Z */
844 for (j=i-4; j >= 0; j -= 4) {
845 /* load p and q values */
846 p1=ell[0];
847 q1=ex[0];
848 p2=ell[-1];
849 p3=ell[-2];
850 p4=ell[-3];
851 /* compute outer product and add it to the Z matrix */
852 m11 = p1 * q1;
853 m21 = p2 * q1;
854 m31 = p3 * q1;
855 m41 = p4 * q1;
856 ell += lskip1;
857 Z11 += m11;
858 Z21 += m21;
859 Z31 += m31;
860 Z41 += m41;
861 /* load p and q values */
862 p1=ell[0];
863 q1=ex[-1];
864 p2=ell[-1];
865 p3=ell[-2];
866 p4=ell[-3];
867 /* compute outer product and add it to the Z matrix */
868 m11 = p1 * q1;
869 m21 = p2 * q1;
870 m31 = p3 * q1;
871 m41 = p4 * q1;
872 ell += lskip1;
873 Z11 += m11;
874 Z21 += m21;
875 Z31 += m31;
876 Z41 += m41;
877 /* load p and q values */
878 p1=ell[0];
879 q1=ex[-2];
880 p2=ell[-1];
881 p3=ell[-2];
882 p4=ell[-3];
883 /* compute outer product and add it to the Z matrix */
884 m11 = p1 * q1;
885 m21 = p2 * q1;
886 m31 = p3 * q1;
887 m41 = p4 * q1;
888 ell += lskip1;
889 Z11 += m11;
890 Z21 += m21;
891 Z31 += m31;
892 Z41 += m41;
893 /* load p and q values */
894 p1=ell[0];
895 q1=ex[-3];
896 p2=ell[-1];
897 p3=ell[-2];
898 p4=ell[-3];
899 /* compute outer product and add it to the Z matrix */
900 m11 = p1 * q1;
901 m21 = p2 * q1;
902 m31 = p3 * q1;
903 m41 = p4 * q1;
904 ell += lskip1;
905 ex -= 4;
906 Z11 += m11;
907 Z21 += m21;
908 Z31 += m31;
909 Z41 += m41;
910 /* end of inner loop */
911 }
912 /* compute left-over iterations */
913 j += 4;
914 for (; j > 0; j--) {
915 /* load p and q values */
916 p1=ell[0];
917 q1=ex[0];
918 p2=ell[-1];
919 p3=ell[-2];
920 p4=ell[-3];
921 /* compute outer product and add it to the Z matrix */
922 m11 = p1 * q1;
923 m21 = p2 * q1;
924 m31 = p3 * q1;
925 m41 = p4 * q1;
926 ell += lskip1;
927 ex -= 1;
928 Z11 += m11;
929 Z21 += m21;
930 Z31 += m31;
931 Z41 += m41;
932 }
933 /* finish computing the X(i) block */
934 Z11 = ex[0] - Z11;
935 ex[0] = Z11;
936 p1 = ell[-1];
937 Z21 = ex[-1] - Z21 - p1*Z11;
938 ex[-1] = Z21;
939 p1 = ell[-2];
940 p2 = ell[-2+lskip1];
941 Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
942 ex[-2] = Z31;
943 p1 = ell[-3];
944 p2 = ell[-3+lskip1];
945 p3 = ell[-3+lskip2];
946 Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
947 ex[-3] = Z41;
948 /* end of outer loop */
949 }
950 /* compute rows at end that are not a multiple of block size */
951 for (; i < n; i++) {
952 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
953 /* set the Z matrix to 0 */
954 Z11=0;
955 ell = L - i;
956 ex = B;
957 /* the inner loop that computes outer products and adds them to Z */
958 for (j=i-4; j >= 0; j -= 4) {
959 /* load p and q values */
960 p1=ell[0];
961 q1=ex[0];
962 /* compute outer product and add it to the Z matrix */
963 m11 = p1 * q1;
964 ell += lskip1;
965 Z11 += m11;
966 /* load p and q values */
967 p1=ell[0];
968 q1=ex[-1];
969 /* compute outer product and add it to the Z matrix */
970 m11 = p1 * q1;
971 ell += lskip1;
972 Z11 += m11;
973 /* load p and q values */
974 p1=ell[0];
975 q1=ex[-2];
976 /* compute outer product and add it to the Z matrix */
977 m11 = p1 * q1;
978 ell += lskip1;
979 Z11 += m11;
980 /* load p and q values */
981 p1=ell[0];
982 q1=ex[-3];
983 /* compute outer product and add it to the Z matrix */
984 m11 = p1 * q1;
985 ell += lskip1;
986 ex -= 4;
987 Z11 += m11;
988 /* end of inner loop */
989 }
990 /* compute left-over iterations */
991 j += 4;
992 for (; j > 0; j--) {
993 /* load p and q values */
994 p1=ell[0];
995 q1=ex[0];
996 /* compute outer product and add it to the Z matrix */
997 m11 = p1 * q1;
998 ell += lskip1;
999 ex -= 1;
1000 Z11 += m11;
1001 }
1002 /* finish computing the X(i) block */
1003 Z11 = ex[0] - Z11;
1004 ex[0] = Z11;
1005 }
1006 }
1007
1008
1009
btVectorScale(btScalar * a,const btScalar * d,int n)1010 void btVectorScale (btScalar *a, const btScalar *d, int n)
1011 {
1012 btAssert (a && d && n >= 0);
1013 for (int i=0; i<n; i++) {
1014 a[i] *= d[i];
1015 }
1016 }
1017
btSolveLDLT(const btScalar * L,const btScalar * d,btScalar * b,int n,int nskip)1018 void btSolveLDLT (const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
1019 {
1020 btAssert (L && d && b && n > 0 && nskip >= n);
1021 btSolveL1 (L,b,n,nskip);
1022 btVectorScale (b,d,n);
1023 btSolveL1T (L,b,n,nskip);
1024 }
1025
1026
1027
1028 //***************************************************************************
1029
1030 // swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
1031 // A is nskip. this only references and swaps the lower triangle.
1032 // if `do_fast_row_swaps' is nonzero and row pointers are being used, then
1033 // rows will be swapped by exchanging row pointers. otherwise the data will
1034 // be copied.
1035
btSwapRowsAndCols(BTATYPE A,int n,int i1,int i2,int nskip,int do_fast_row_swaps)1036 static void btSwapRowsAndCols (BTATYPE A, int n, int i1, int i2, int nskip,
1037 int do_fast_row_swaps)
1038 {
1039 btAssert (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
1040 nskip >= n && i1 < i2);
1041
1042 # ifdef BTROWPTRS
1043 btScalar *A_i1 = A[i1];
1044 btScalar *A_i2 = A[i2];
1045 for (int i=i1+1; i<i2; ++i) {
1046 btScalar *A_i_i1 = A[i] + i1;
1047 A_i1[i] = *A_i_i1;
1048 *A_i_i1 = A_i2[i];
1049 }
1050 A_i1[i2] = A_i1[i1];
1051 A_i1[i1] = A_i2[i1];
1052 A_i2[i1] = A_i2[i2];
1053 // swap rows, by swapping row pointers
1054 if (do_fast_row_swaps) {
1055 A[i1] = A_i2;
1056 A[i2] = A_i1;
1057 }
1058 else {
1059 // Only swap till i2 column to match A plain storage variant.
1060 for (int k = 0; k <= i2; ++k) {
1061 btScalar tmp = A_i1[k];
1062 A_i1[k] = A_i2[k];
1063 A_i2[k] = tmp;
1064 }
1065 }
1066 // swap columns the hard way
1067 for (int j=i2+1; j<n; ++j) {
1068 btScalar *A_j = A[j];
1069 btScalar tmp = A_j[i1];
1070 A_j[i1] = A_j[i2];
1071 A_j[i2] = tmp;
1072 }
1073 # else
1074 btScalar *A_i1 = A+i1*nskip;
1075 btScalar *A_i2 = A+i2*nskip;
1076 for (int k = 0; k < i1; ++k) {
1077 btScalar tmp = A_i1[k];
1078 A_i1[k] = A_i2[k];
1079 A_i2[k] = tmp;
1080 }
1081 btScalar *A_i = A_i1 + nskip;
1082 for (int i=i1+1; i<i2; A_i+=nskip, ++i) {
1083 btScalar tmp = A_i2[i];
1084 A_i2[i] = A_i[i1];
1085 A_i[i1] = tmp;
1086 }
1087 {
1088 btScalar tmp = A_i1[i1];
1089 A_i1[i1] = A_i2[i2];
1090 A_i2[i2] = tmp;
1091 }
1092 btScalar *A_j = A_i2 + nskip;
1093 for (int j=i2+1; j<n; A_j+=nskip, ++j) {
1094 btScalar tmp = A_j[i1];
1095 A_j[i1] = A_j[i2];
1096 A_j[i2] = tmp;
1097 }
1098 # endif
1099 }
1100
1101
1102 // swap two indexes in the n*n LCP problem. i1 must be <= i2.
1103
btSwapProblem(BTATYPE A,btScalar * x,btScalar * b,btScalar * w,btScalar * lo,btScalar * hi,int * p,bool * state,int * findex,int n,int i1,int i2,int nskip,int do_fast_row_swaps)1104 static void btSwapProblem (BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo,
1105 btScalar *hi, int *p, bool *state, int *findex,
1106 int n, int i1, int i2, int nskip,
1107 int do_fast_row_swaps)
1108 {
1109 btScalar tmpr;
1110 int tmpi;
1111 bool tmpb;
1112 btAssert (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
1113 if (i1==i2) return;
1114
1115 btSwapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
1116
1117 tmpr = x[i1];
1118 x[i1] = x[i2];
1119 x[i2] = tmpr;
1120
1121 tmpr = b[i1];
1122 b[i1] = b[i2];
1123 b[i2] = tmpr;
1124
1125 tmpr = w[i1];
1126 w[i1] = w[i2];
1127 w[i2] = tmpr;
1128
1129 tmpr = lo[i1];
1130 lo[i1] = lo[i2];
1131 lo[i2] = tmpr;
1132
1133 tmpr = hi[i1];
1134 hi[i1] = hi[i2];
1135 hi[i2] = tmpr;
1136
1137 tmpi = p[i1];
1138 p[i1] = p[i2];
1139 p[i2] = tmpi;
1140
1141 tmpb = state[i1];
1142 state[i1] = state[i2];
1143 state[i2] = tmpb;
1144
1145 if (findex) {
1146 tmpi = findex[i1];
1147 findex[i1] = findex[i2];
1148 findex[i2] = tmpi;
1149 }
1150 }
1151
1152
1153
1154
1155 //***************************************************************************
1156 // btLCP manipulator object. this represents an n*n LCP problem.
1157 //
1158 // two index sets C and N are kept. each set holds a subset of
1159 // the variable indexes 0..n-1. an index can only be in one set.
1160 // initially both sets are empty.
1161 //
1162 // the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
1163
1164 //***************************************************************************
1165 // fast implementation of btLCP. see the above definition of btLCP for
1166 // interface comments.
1167 //
1168 // `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
1169 // permuted as the other vectors/matrices are permuted.
1170 //
1171 // A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
1172 // contiguous indexes. the don't-care indexes follow N.
1173 //
1174 // an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
1175 // added or removed from the set C the factorization is updated.
1176 // thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
1177 // the leading dimension of the matrix L is always `nskip'.
1178 //
1179 // at the start there may be other indexes that are unbounded but are not
1180 // included in `nub'. btLCP will permute the matrix so that absolutely all
1181 // unbounded vectors are at the start. thus there may be some initial
1182 // permutation.
1183 //
1184 // the algorithms here assume certain patterns, particularly with respect to
1185 // index transfer.
1186
1187 #ifdef btLCP_FAST
1188
1189 struct btLCP
1190 {
1191 const int m_n;
1192 const int m_nskip;
1193 int m_nub;
1194 int m_nC, m_nN; // size of each index set
1195 BTATYPE const m_A; // A rows
1196 btScalar *const m_x, * const m_b, *const m_w, *const m_lo,* const m_hi; // permuted LCP problem data
1197 btScalar *const m_L, *const m_d; // L*D*L' factorization of set C
1198 btScalar *const m_Dell, *const m_ell, *const m_tmp;
1199 bool *const m_state;
1200 int *const m_findex, *const m_p, *const m_C;
1201
1202 btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1203 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1204 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1205 bool *_state, int *_findex, int *p, int *c, btScalar **Arows);
getNubbtLCP1206 int getNub() const { return m_nub; }
1207 void transfer_i_to_C (int i);
transfer_i_to_NbtLCP1208 void transfer_i_to_N (int i) { m_nN++; } // because we can assume C and N span 1:i-1
1209 void transfer_i_from_N_to_C (int i);
1210 void transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch);
numCbtLCP1211 int numC() const { return m_nC; }
numNbtLCP1212 int numN() const { return m_nN; }
indexCbtLCP1213 int indexC (int i) const { return i; }
indexNbtLCP1214 int indexN (int i) const { return i+m_nC; }
AiibtLCP1215 btScalar Aii (int i) const { return BTAROW(i)[i]; }
AiC_times_qCbtLCP1216 btScalar AiC_times_qC (int i, btScalar *q) const { return btLargeDot (BTAROW(i), q, m_nC); }
AiN_times_qNbtLCP1217 btScalar AiN_times_qN (int i, btScalar *q) const { return btLargeDot (BTAROW(i)+m_nC, q+m_nC, m_nN); }
1218 void pN_equals_ANC_times_qC (btScalar *p, btScalar *q);
1219 void pN_plusequals_ANi (btScalar *p, int i, int sign=1);
1220 void pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q);
1221 void pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q);
1222 void solve1 (btScalar *a, int i, int dir=1, int only_transfer=0);
1223 void unpermute();
1224 };
1225
1226
btLCP(int _n,int _nskip,int _nub,btScalar * _Adata,btScalar * _x,btScalar * _b,btScalar * _w,btScalar * _lo,btScalar * _hi,btScalar * l,btScalar * _d,btScalar * _Dell,btScalar * _ell,btScalar * _tmp,bool * _state,int * _findex,int * p,int * c,btScalar ** Arows)1227 btLCP::btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1228 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1229 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1230 bool *_state, int *_findex, int *p, int *c, btScalar **Arows):
1231 m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
1232 # ifdef BTROWPTRS
1233 m_A(Arows),
1234 #else
1235 m_A(_Adata),
1236 #endif
1237 m_x(_x), m_b(_b), m_w(_w), m_lo(_lo), m_hi(_hi),
1238 m_L(l), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp),
1239 m_state(_state), m_findex(_findex), m_p(p), m_C(c)
1240 {
1241 {
1242 btSetZero (m_x,m_n);
1243 }
1244
1245 {
1246 # ifdef BTROWPTRS
1247 // make matrix row pointers
1248 btScalar *aptr = _Adata;
1249 BTATYPE A = m_A;
1250 const int n = m_n, nskip = m_nskip;
1251 for (int k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
1252 # endif
1253 }
1254
1255 {
1256 int *p = m_p;
1257 const int n = m_n;
1258 for (int k=0; k<n; ++k) p[k]=k; // initially unpermuted
1259 }
1260
1261 /*
1262 // for testing, we can do some random swaps in the area i > nub
1263 {
1264 const int n = m_n;
1265 const int nub = m_nub;
1266 if (nub < n) {
1267 for (int k=0; k<100; k++) {
1268 int i1,i2;
1269 do {
1270 i1 = dRandInt(n-nub)+nub;
1271 i2 = dRandInt(n-nub)+nub;
1272 }
1273 while (i1 > i2);
1274 //printf ("--> %d %d\n",i1,i2);
1275 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0);
1276 }
1277 }
1278 */
1279
1280 // permute the problem so that *all* the unbounded variables are at the
1281 // start, i.e. look for unbounded variables not included in `nub'. we can
1282 // potentially push up `nub' this way and get a bigger initial factorization.
1283 // note that when we swap rows/cols here we must not just swap row pointers,
1284 // as the initial factorization relies on the data being all in one chunk.
1285 // variables that have findex >= 0 are *not* considered to be unbounded even
1286 // if lo=-inf and hi=inf - this is because these limits may change during the
1287 // solution process.
1288
1289 {
1290 int *findex = m_findex;
1291 btScalar *lo = m_lo, *hi = m_hi;
1292 const int n = m_n;
1293 for (int k = m_nub; k<n; ++k) {
1294 if (findex && findex[k] >= 0) continue;
1295 if (lo[k]==-BT_INFINITY && hi[k]==BT_INFINITY) {
1296 btSwapProblem (m_A,m_x,m_b,m_w,lo,hi,m_p,m_state,findex,n,m_nub,k,m_nskip,0);
1297 m_nub++;
1298 }
1299 }
1300 }
1301
1302 // if there are unbounded variables at the start, factorize A up to that
1303 // point and solve for x. this puts all indexes 0..nub-1 into C.
1304 if (m_nub > 0) {
1305 const int nub = m_nub;
1306 {
1307 btScalar *Lrow = m_L;
1308 const int nskip = m_nskip;
1309 for (int j=0; j<nub; Lrow+=nskip, ++j) memcpy(Lrow,BTAROW(j),(j+1)*sizeof(btScalar));
1310 }
1311 btFactorLDLT (m_L,m_d,nub,m_nskip);
1312 memcpy (m_x,m_b,nub*sizeof(btScalar));
1313 btSolveLDLT (m_L,m_d,m_x,nub,m_nskip);
1314 btSetZero (m_w,nub);
1315 {
1316 int *C = m_C;
1317 for (int k=0; k<nub; ++k) C[k] = k;
1318 }
1319 m_nC = nub;
1320 }
1321
1322 // permute the indexes > nub such that all findex variables are at the end
1323 if (m_findex) {
1324 const int nub = m_nub;
1325 int *findex = m_findex;
1326 int num_at_end = 0;
1327 for (int k=m_n-1; k >= nub; k--) {
1328 if (findex[k] >= 0) {
1329 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,findex,m_n,k,m_n-1-num_at_end,m_nskip,1);
1330 num_at_end++;
1331 }
1332 }
1333 }
1334
1335 // print info about indexes
1336 /*
1337 {
1338 const int n = m_n;
1339 const int nub = m_nub;
1340 for (int k=0; k<n; k++) {
1341 if (k<nub) printf ("C");
1342 else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c");
1343 else printf (".");
1344 }
1345 printf ("\n");
1346 }
1347 */
1348 }
1349
1350
transfer_i_to_C(int i)1351 void btLCP::transfer_i_to_C (int i)
1352 {
1353 {
1354 if (m_nC > 0) {
1355 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
1356 {
1357 const int nC = m_nC;
1358 btScalar *const Ltgt = m_L + nC*m_nskip, *ell = m_ell;
1359 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j];
1360 }
1361 const int nC = m_nC;
1362 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
1363 }
1364 else {
1365 m_d[0] = btRecip (BTAROW(i)[i]);
1366 }
1367
1368 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1);
1369
1370 const int nC = m_nC;
1371 m_C[nC] = nC;
1372 m_nC = nC + 1; // nC value is outdated after this line
1373 }
1374
1375 }
1376
1377
transfer_i_from_N_to_C(int i)1378 void btLCP::transfer_i_from_N_to_C (int i)
1379 {
1380 {
1381 if (m_nC > 0) {
1382 {
1383 btScalar *const aptr = BTAROW(i);
1384 btScalar *Dell = m_Dell;
1385 const int *C = m_C;
1386 # ifdef BTNUB_OPTIMIZATIONS
1387 // if nub>0, initial part of aptr unpermuted
1388 const int nub = m_nub;
1389 int j=0;
1390 for ( ; j<nub; ++j) Dell[j] = aptr[j];
1391 const int nC = m_nC;
1392 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
1393 # else
1394 const int nC = m_nC;
1395 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
1396 # endif
1397 }
1398 btSolveL1 (m_L,m_Dell,m_nC,m_nskip);
1399 {
1400 const int nC = m_nC;
1401 btScalar *const Ltgt = m_L + nC*m_nskip;
1402 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1403 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
1404 }
1405 const int nC = m_nC;
1406 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
1407 }
1408 else {
1409 m_d[0] = btRecip (BTAROW(i)[i]);
1410 }
1411
1412 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,m_nC,i,m_nskip,1);
1413
1414 const int nC = m_nC;
1415 m_C[nC] = nC;
1416 m_nN--;
1417 m_nC = nC + 1; // nC value is outdated after this line
1418 }
1419
1420 // @@@ TO DO LATER
1421 // if we just finish here then we'll go back and re-solve for
1422 // delta_x. but actually we can be more efficient and incrementally
1423 // update delta_x here. but if we do this, we wont have ell and Dell
1424 // to use in updating the factorization later.
1425
1426 }
1427
btRemoveRowCol(btScalar * A,int n,int nskip,int r)1428 void btRemoveRowCol (btScalar *A, int n, int nskip, int r)
1429 {
1430 btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
1431 if (r >= n-1) return;
1432 if (r > 0) {
1433 {
1434 const size_t move_size = (n-r-1)*sizeof(btScalar);
1435 btScalar *Adst = A + r;
1436 for (int i=0; i<r; Adst+=nskip,++i) {
1437 btScalar *Asrc = Adst + 1;
1438 memmove (Adst,Asrc,move_size);
1439 }
1440 }
1441 {
1442 const size_t cpy_size = r*sizeof(btScalar);
1443 btScalar *Adst = A + r * nskip;
1444 for (int i=r; i<(n-1); ++i) {
1445 btScalar *Asrc = Adst + nskip;
1446 memcpy (Adst,Asrc,cpy_size);
1447 Adst = Asrc;
1448 }
1449 }
1450 }
1451 {
1452 const size_t cpy_size = (n-r-1)*sizeof(btScalar);
1453 btScalar *Adst = A + r * (nskip + 1);
1454 for (int i=r; i<(n-1); ++i) {
1455 btScalar *Asrc = Adst + (nskip + 1);
1456 memcpy (Adst,Asrc,cpy_size);
1457 Adst = Asrc - 1;
1458 }
1459 }
1460 }
1461
1462
1463
1464
btLDLTAddTL(btScalar * L,btScalar * d,const btScalar * a,int n,int nskip,btAlignedObjectArray<btScalar> & scratch)1465 void btLDLTAddTL (btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar>& scratch)
1466 {
1467 btAssert (L && d && a && n > 0 && nskip >= n);
1468
1469 if (n < 2) return;
1470 scratch.resize(2*nskip);
1471 btScalar *W1 = &scratch[0];
1472
1473 btScalar *W2 = W1 + nskip;
1474
1475 W1[0] = btScalar(0.0);
1476 W2[0] = btScalar(0.0);
1477 for (int j=1; j<n; ++j) {
1478 W1[j] = W2[j] = (btScalar) (a[j] * SIMDSQRT12);
1479 }
1480 btScalar W11 = (btScalar) ((btScalar(0.5)*a[0]+1)*SIMDSQRT12);
1481 btScalar W21 = (btScalar) ((btScalar(0.5)*a[0]-1)*SIMDSQRT12);
1482
1483 btScalar alpha1 = btScalar(1.0);
1484 btScalar alpha2 = btScalar(1.0);
1485
1486 {
1487 btScalar dee = d[0];
1488 btScalar alphanew = alpha1 + (W11*W11)*dee;
1489 btAssert(alphanew != btScalar(0.0));
1490 dee /= alphanew;
1491 btScalar gamma1 = W11 * dee;
1492 dee *= alpha1;
1493 alpha1 = alphanew;
1494 alphanew = alpha2 - (W21*W21)*dee;
1495 dee /= alphanew;
1496 //btScalar gamma2 = W21 * dee;
1497 alpha2 = alphanew;
1498 btScalar k1 = btScalar(1.0) - W21*gamma1;
1499 btScalar k2 = W21*gamma1*W11 - W21;
1500 btScalar *ll = L + nskip;
1501 for (int p=1; p<n; ll+=nskip, ++p) {
1502 btScalar Wp = W1[p];
1503 btScalar ell = *ll;
1504 W1[p] = Wp - W11*ell;
1505 W2[p] = k1*Wp + k2*ell;
1506 }
1507 }
1508
1509 btScalar *ll = L + (nskip + 1);
1510 for (int j=1; j<n; ll+=nskip+1, ++j) {
1511 btScalar k1 = W1[j];
1512 btScalar k2 = W2[j];
1513
1514 btScalar dee = d[j];
1515 btScalar alphanew = alpha1 + (k1*k1)*dee;
1516 btAssert(alphanew != btScalar(0.0));
1517 dee /= alphanew;
1518 btScalar gamma1 = k1 * dee;
1519 dee *= alpha1;
1520 alpha1 = alphanew;
1521 alphanew = alpha2 - (k2*k2)*dee;
1522 dee /= alphanew;
1523 btScalar gamma2 = k2 * dee;
1524 dee *= alpha2;
1525 d[j] = dee;
1526 alpha2 = alphanew;
1527
1528 btScalar *l = ll + nskip;
1529 for (int p=j+1; p<n; l+=nskip, ++p) {
1530 btScalar ell = *l;
1531 btScalar Wp = W1[p] - k1 * ell;
1532 ell += gamma1 * Wp;
1533 W1[p] = Wp;
1534 Wp = W2[p] - k2 * ell;
1535 ell -= gamma2 * Wp;
1536 W2[p] = Wp;
1537 *l = ell;
1538 }
1539 }
1540 }
1541
1542
1543 #define _BTGETA(i,j) (A[i][j])
1544 //#define _GETA(i,j) (A[(i)*nskip+(j)])
1545 #define BTGETA(i,j) ((i > j) ? _BTGETA(i,j) : _BTGETA(j,i))
1546
btEstimateLDLTAddTLTmpbufSize(int nskip)1547 inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
1548 {
1549 return nskip * 2 * sizeof(btScalar);
1550 }
1551
1552
btLDLTRemove(btScalar ** A,const int * p,btScalar * L,btScalar * d,int n1,int n2,int r,int nskip,btAlignedObjectArray<btScalar> & scratch)1553 void btLDLTRemove (btScalar **A, const int *p, btScalar *L, btScalar *d,
1554 int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar>& scratch)
1555 {
1556 btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
1557 n1 >= n2 && nskip >= n1);
1558 #ifdef BT_DEBUG
1559 for (int i=0; i<n2; ++i)
1560 btAssert(p[i] >= 0 && p[i] < n1);
1561 #endif
1562
1563 if (r==n2-1) {
1564 return; // deleting last row/col is easy
1565 }
1566 else {
1567 size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip);
1568 btAssert(LDLTAddTL_size % sizeof(btScalar) == 0);
1569 scratch.resize(nskip * 2+n2);
1570 btScalar *tmp = &scratch[0];
1571 if (r==0) {
1572 btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size);
1573 const int p_0 = p[0];
1574 for (int i=0; i<n2; ++i) {
1575 a[i] = -BTGETA(p[i],p_0);
1576 }
1577 a[0] += btScalar(1.0);
1578 btLDLTAddTL (L,d,a,n2,nskip,scratch);
1579 }
1580 else {
1581 btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size);
1582 {
1583 btScalar *Lcurr = L + r*nskip;
1584 for (int i=0; i<r; ++Lcurr, ++i) {
1585 btAssert(d[i] != btScalar(0.0));
1586 t[i] = *Lcurr / d[i];
1587 }
1588 }
1589 btScalar *a = t + r;
1590 {
1591 btScalar *Lcurr = L + r*nskip;
1592 const int *pp_r = p + r, p_r = *pp_r;
1593 const int n2_minus_r = n2-r;
1594 for (int i=0; i<n2_minus_r; Lcurr+=nskip,++i) {
1595 a[i] = btLargeDot(Lcurr,t,r) - BTGETA(pp_r[i],p_r);
1596 }
1597 }
1598 a[0] += btScalar(1.0);
1599 btLDLTAddTL (L + r*nskip+r, d+r, a, n2-r, nskip, scratch);
1600 }
1601 }
1602
1603 // snip out row/column r from L and d
1604 btRemoveRowCol (L,n2,nskip,r);
1605 if (r < (n2-1)) memmove (d+r,d+r+1,(n2-r-1)*sizeof(btScalar));
1606 }
1607
1608
transfer_i_from_C_to_N(int i,btAlignedObjectArray<btScalar> & scratch)1609 void btLCP::transfer_i_from_C_to_N (int i, btAlignedObjectArray<btScalar>& scratch)
1610 {
1611 {
1612 int *C = m_C;
1613 // remove a row/column from the factorization, and adjust the
1614 // indexes (black magic!)
1615 int last_idx = -1;
1616 const int nC = m_nC;
1617 int j = 0;
1618 for ( ; j<nC; ++j) {
1619 if (C[j]==nC-1) {
1620 last_idx = j;
1621 }
1622 if (C[j]==i) {
1623 btLDLTRemove (m_A,C,m_L,m_d,m_n,nC,j,m_nskip,scratch);
1624 int k;
1625 if (last_idx == -1) {
1626 for (k=j+1 ; k<nC; ++k) {
1627 if (C[k]==nC-1) {
1628 break;
1629 }
1630 }
1631 btAssert (k < nC);
1632 }
1633 else {
1634 k = last_idx;
1635 }
1636 C[k] = C[j];
1637 if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
1638 break;
1639 }
1640 }
1641 btAssert (j < nC);
1642
1643 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,m_n,i,nC-1,m_nskip,1);
1644
1645 m_nN++;
1646 m_nC = nC - 1; // nC value is outdated after this line
1647 }
1648
1649 }
1650
1651
pN_equals_ANC_times_qC(btScalar * p,btScalar * q)1652 void btLCP::pN_equals_ANC_times_qC (btScalar *p, btScalar *q)
1653 {
1654 // we could try to make this matrix-vector multiplication faster using
1655 // outer product matrix tricks, e.g. with the dMultidotX() functions.
1656 // but i tried it and it actually made things slower on random 100x100
1657 // problems because of the overhead involved. so we'll stick with the
1658 // simple method for now.
1659 const int nC = m_nC;
1660 btScalar *ptgt = p + nC;
1661 const int nN = m_nN;
1662 for (int i=0; i<nN; ++i) {
1663 ptgt[i] = btLargeDot (BTAROW(i+nC),q,nC);
1664 }
1665 }
1666
1667
pN_plusequals_ANi(btScalar * p,int i,int sign)1668 void btLCP::pN_plusequals_ANi (btScalar *p, int i, int sign)
1669 {
1670 const int nC = m_nC;
1671 btScalar *aptr = BTAROW(i) + nC;
1672 btScalar *ptgt = p + nC;
1673 if (sign > 0) {
1674 const int nN = m_nN;
1675 for (int j=0; j<nN; ++j) ptgt[j] += aptr[j];
1676 }
1677 else {
1678 const int nN = m_nN;
1679 for (int j=0; j<nN; ++j) ptgt[j] -= aptr[j];
1680 }
1681 }
1682
pC_plusequals_s_times_qC(btScalar * p,btScalar s,btScalar * q)1683 void btLCP::pC_plusequals_s_times_qC (btScalar *p, btScalar s, btScalar *q)
1684 {
1685 const int nC = m_nC;
1686 for (int i=0; i<nC; ++i) {
1687 p[i] += s*q[i];
1688 }
1689 }
1690
pN_plusequals_s_times_qN(btScalar * p,btScalar s,btScalar * q)1691 void btLCP::pN_plusequals_s_times_qN (btScalar *p, btScalar s, btScalar *q)
1692 {
1693 const int nC = m_nC;
1694 btScalar *ptgt = p + nC, *qsrc = q + nC;
1695 const int nN = m_nN;
1696 for (int i=0; i<nN; ++i) {
1697 ptgt[i] += s*qsrc[i];
1698 }
1699 }
1700
solve1(btScalar * a,int i,int dir,int only_transfer)1701 void btLCP::solve1 (btScalar *a, int i, int dir, int only_transfer)
1702 {
1703 // the `Dell' and `ell' that are computed here are saved. if index i is
1704 // later added to the factorization then they can be reused.
1705 //
1706 // @@@ question: do we need to solve for entire delta_x??? yes, but
1707 // only if an x goes below 0 during the step.
1708
1709 if (m_nC > 0) {
1710 {
1711 btScalar *Dell = m_Dell;
1712 int *C = m_C;
1713 btScalar *aptr = BTAROW(i);
1714 # ifdef BTNUB_OPTIMIZATIONS
1715 // if nub>0, initial part of aptr[] is guaranteed unpermuted
1716 const int nub = m_nub;
1717 int j=0;
1718 for ( ; j<nub; ++j) Dell[j] = aptr[j];
1719 const int nC = m_nC;
1720 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
1721 # else
1722 const int nC = m_nC;
1723 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
1724 # endif
1725 }
1726 btSolveL1 (m_L,m_Dell,m_nC,m_nskip);
1727 {
1728 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1729 const int nC = m_nC;
1730 for (int j=0; j<nC; ++j) ell[j] = Dell[j] * d[j];
1731 }
1732
1733 if (!only_transfer) {
1734 btScalar *tmp = m_tmp, *ell = m_ell;
1735 {
1736 const int nC = m_nC;
1737 for (int j=0; j<nC; ++j) tmp[j] = ell[j];
1738 }
1739 btSolveL1T (m_L,tmp,m_nC,m_nskip);
1740 if (dir > 0) {
1741 int *C = m_C;
1742 btScalar *tmp = m_tmp;
1743 const int nC = m_nC;
1744 for (int j=0; j<nC; ++j) a[C[j]] = -tmp[j];
1745 } else {
1746 int *C = m_C;
1747 btScalar *tmp = m_tmp;
1748 const int nC = m_nC;
1749 for (int j=0; j<nC; ++j) a[C[j]] = tmp[j];
1750 }
1751 }
1752 }
1753 }
1754
1755
unpermute()1756 void btLCP::unpermute()
1757 {
1758 // now we have to un-permute x and w
1759 {
1760 memcpy (m_tmp,m_x,m_n*sizeof(btScalar));
1761 btScalar *x = m_x, *tmp = m_tmp;
1762 const int *p = m_p;
1763 const int n = m_n;
1764 for (int j=0; j<n; ++j) x[p[j]] = tmp[j];
1765 }
1766 {
1767 memcpy (m_tmp,m_w,m_n*sizeof(btScalar));
1768 btScalar *w = m_w, *tmp = m_tmp;
1769 const int *p = m_p;
1770 const int n = m_n;
1771 for (int j=0; j<n; ++j) w[p[j]] = tmp[j];
1772 }
1773 }
1774
1775 #endif // btLCP_FAST
1776
1777
1778 //***************************************************************************
1779 // an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1780
btSolveDantzigLCP(int n,btScalar * A,btScalar * x,btScalar * b,btScalar * outer_w,int nub,btScalar * lo,btScalar * hi,int * findex,btDantzigScratchMemory & scratchMem)1781 bool btSolveDantzigLCP (int n, btScalar *A, btScalar *x, btScalar *b,
1782 btScalar* outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory& scratchMem)
1783 {
1784 s_error = false;
1785
1786 // printf("btSolveDantzigLCP n=%d\n",n);
1787 btAssert (n>0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
1788 btAssert(outer_w);
1789
1790 #ifdef BT_DEBUG
1791 {
1792 // check restrictions on lo and hi
1793 for (int k=0; k<n; ++k)
1794 btAssert (lo[k] <= 0 && hi[k] >= 0);
1795 }
1796 # endif
1797
1798
1799 // if all the variables are unbounded then we can just factor, solve,
1800 // and return
1801 if (nub >= n)
1802 {
1803
1804
1805 int nskip = (n);
1806 btFactorLDLT (A, outer_w, n, nskip);
1807 btSolveLDLT (A, outer_w, b, n, nskip);
1808 memcpy (x, b, n*sizeof(btScalar));
1809
1810 return !s_error;
1811 }
1812
1813 const int nskip = (n);
1814 scratchMem.L.resize(n*nskip);
1815
1816 scratchMem.d.resize(n);
1817
1818 btScalar *w = outer_w;
1819 scratchMem.delta_w.resize(n);
1820 scratchMem.delta_x.resize(n);
1821 scratchMem.Dell.resize(n);
1822 scratchMem.ell.resize(n);
1823 scratchMem.Arows.resize(n);
1824 scratchMem.p.resize(n);
1825 scratchMem.C.resize(n);
1826
1827 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1828 scratchMem.state.resize(n);
1829
1830
1831 // create LCP object. note that tmp is set to delta_w to save space, this
1832 // optimization relies on knowledge of how tmp is used, so be careful!
1833 btLCP lcp(n,nskip,nub,A,x,b,w,lo,hi,&scratchMem.L[0],&scratchMem.d[0],&scratchMem.Dell[0],&scratchMem.ell[0],&scratchMem.delta_w[0],&scratchMem.state[0],findex,&scratchMem.p[0],&scratchMem.C[0],&scratchMem.Arows[0]);
1834 int adj_nub = lcp.getNub();
1835
1836 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
1837 // LCP conditions then i is added to the appropriate index set. otherwise
1838 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1839 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1840 // while driving x(i) we maintain the LCP conditions on the other variables
1841 // 0..i-1. we do this by watching out for other x(i),w(i) values going
1842 // outside the valid region, and then switching them between index sets
1843 // when that happens.
1844
1845 bool hit_first_friction_index = false;
1846 for (int i=adj_nub; i<n; ++i)
1847 {
1848 s_error = false;
1849 // the index i is the driving index and indexes i+1..n-1 are "dont care",
1850 // i.e. when we make changes to the system those x's will be zero and we
1851 // don't care what happens to those w's. in other words, we only consider
1852 // an (i+1)*(i+1) sub-problem of A*x=b+w.
1853
1854 // if we've hit the first friction index, we have to compute the lo and
1855 // hi values based on the values of x already computed. we have been
1856 // permuting the indexes, so the values stored in the findex vector are
1857 // no longer valid. thus we have to temporarily unpermute the x vector.
1858 // for the purposes of this computation, 0*infinity = 0 ... so if the
1859 // contact constraint's normal force is 0, there should be no tangential
1860 // force applied.
1861
1862 if (!hit_first_friction_index && findex && findex[i] >= 0) {
1863 // un-permute x into delta_w, which is not being used at the moment
1864 for (int j=0; j<n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j];
1865
1866 // set lo and hi values
1867 for (int k=i; k<n; ++k) {
1868 btScalar wfk = scratchMem.delta_w[findex[k]];
1869 if (wfk == 0) {
1870 hi[k] = 0;
1871 lo[k] = 0;
1872 }
1873 else {
1874 hi[k] = btFabs (hi[k] * wfk);
1875 lo[k] = -hi[k];
1876 }
1877 }
1878 hit_first_friction_index = true;
1879 }
1880
1881 // thus far we have not even been computing the w values for indexes
1882 // greater than i, so compute w[i] now.
1883 w[i] = lcp.AiC_times_qC (i,x) + lcp.AiN_times_qN (i,x) - b[i];
1884
1885 // if lo=hi=0 (which can happen for tangential friction when normals are
1886 // 0) then the index will be assigned to set N with some state. however,
1887 // set C's line has zero size, so the index will always remain in set N.
1888 // with the "normal" switching logic, if w changed sign then the index
1889 // would have to switch to set C and then back to set N with an inverted
1890 // state. this is pointless, and also computationally expensive. to
1891 // prevent this from happening, we use the rule that indexes with lo=hi=0
1892 // will never be checked for set changes. this means that the state for
1893 // these indexes may be incorrect, but that doesn't matter.
1894
1895 // see if x(i),w(i) is in a valid region
1896 if (lo[i]==0 && w[i] >= 0) {
1897 lcp.transfer_i_to_N (i);
1898 scratchMem.state[i] = false;
1899 }
1900 else if (hi[i]==0 && w[i] <= 0) {
1901 lcp.transfer_i_to_N (i);
1902 scratchMem.state[i] = true;
1903 }
1904 else if (w[i]==0) {
1905 // this is a degenerate case. by the time we get to this test we know
1906 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1907 // and similarly that hi > 0. this means that the line segment
1908 // corresponding to set C is at least finite in extent, and we are on it.
1909 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
1910 lcp.solve1 (&scratchMem.delta_x[0],i,0,1);
1911
1912 lcp.transfer_i_to_C (i);
1913 }
1914 else {
1915 // we must push x(i) and w(i)
1916 for (;;) {
1917 int dir;
1918 btScalar dirf;
1919 // find direction to push on x(i)
1920 if (w[i] <= 0) {
1921 dir = 1;
1922 dirf = btScalar(1.0);
1923 }
1924 else {
1925 dir = -1;
1926 dirf = btScalar(-1.0);
1927 }
1928
1929 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1930 lcp.solve1 (&scratchMem.delta_x[0],i,dir);
1931
1932 // note that delta_x[i] = dirf, but we wont bother to set it
1933
1934 // compute: delta_w = A*delta_x ... note we only care about
1935 // delta_w(N) and delta_w(i), the rest is ignored
1936 lcp.pN_equals_ANC_times_qC (&scratchMem.delta_w[0],&scratchMem.delta_x[0]);
1937 lcp.pN_plusequals_ANi (&scratchMem.delta_w[0],i,dir);
1938 scratchMem.delta_w[i] = lcp.AiC_times_qC (i,&scratchMem.delta_x[0]) + lcp.Aii(i)*dirf;
1939
1940 // find largest step we can take (size=s), either to drive x(i),w(i)
1941 // to the valid LCP region or to drive an already-valid variable
1942 // outside the valid region.
1943
1944 int cmd = 1; // index switching command
1945 int si = 0; // si = index to switch if cmd>3
1946 btScalar s = -w[i]/scratchMem.delta_w[i];
1947 if (dir > 0) {
1948 if (hi[i] < BT_INFINITY) {
1949 btScalar s2 = (hi[i]-x[i])*dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
1950 if (s2 < s) {
1951 s = s2;
1952 cmd = 3;
1953 }
1954 }
1955 }
1956 else {
1957 if (lo[i] > -BT_INFINITY) {
1958 btScalar s2 = (lo[i]-x[i])*dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
1959 if (s2 < s) {
1960 s = s2;
1961 cmd = 2;
1962 }
1963 }
1964 }
1965
1966 {
1967 const int numN = lcp.numN();
1968 for (int k=0; k < numN; ++k) {
1969 const int indexN_k = lcp.indexN(k);
1970 if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0) {
1971 // don't bother checking if lo=hi=0
1972 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue;
1973 btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k];
1974 if (s2 < s) {
1975 s = s2;
1976 cmd = 4;
1977 si = indexN_k;
1978 }
1979 }
1980 }
1981 }
1982
1983 {
1984 const int numC = lcp.numC();
1985 for (int k=adj_nub; k < numC; ++k) {
1986 const int indexC_k = lcp.indexC(k);
1987 if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY) {
1988 btScalar s2 = (lo[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
1989 if (s2 < s) {
1990 s = s2;
1991 cmd = 5;
1992 si = indexC_k;
1993 }
1994 }
1995 if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY) {
1996 btScalar s2 = (hi[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
1997 if (s2 < s) {
1998 s = s2;
1999 cmd = 6;
2000 si = indexC_k;
2001 }
2002 }
2003 }
2004 }
2005
2006 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
2007 // "C->NL","C->NH"};
2008 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
2009
2010 // if s <= 0 then we've got a problem. if we just keep going then
2011 // we're going to get stuck in an infinite loop. instead, just cross
2012 // our fingers and exit with the current solution.
2013 if (s <= btScalar(0.0))
2014 {
2015 // printf("LCP internal error, s <= 0 (s=%.4e)",(double)s);
2016 if (i < n) {
2017 btSetZero (x+i,n-i);
2018 btSetZero (w+i,n-i);
2019 }
2020 s_error = true;
2021 break;
2022 }
2023
2024 // apply x = x + s * delta_x
2025 lcp.pC_plusequals_s_times_qC (x, s, &scratchMem.delta_x[0]);
2026 x[i] += s * dirf;
2027
2028 // apply w = w + s * delta_w
2029 lcp.pN_plusequals_s_times_qN (w, s, &scratchMem.delta_w[0]);
2030 w[i] += s * scratchMem.delta_w[i];
2031
2032 // void *tmpbuf;
2033 // switch indexes between sets if necessary
2034 switch (cmd) {
2035 case 1: // done
2036 w[i] = 0;
2037 lcp.transfer_i_to_C (i);
2038 break;
2039 case 2: // done
2040 x[i] = lo[i];
2041 scratchMem.state[i] = false;
2042 lcp.transfer_i_to_N (i);
2043 break;
2044 case 3: // done
2045 x[i] = hi[i];
2046 scratchMem.state[i] = true;
2047 lcp.transfer_i_to_N (i);
2048 break;
2049 case 4: // keep going
2050 w[si] = 0;
2051 lcp.transfer_i_from_N_to_C (si);
2052 break;
2053 case 5: // keep going
2054 x[si] = lo[si];
2055 scratchMem.state[si] = false;
2056 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
2057 break;
2058 case 6: // keep going
2059 x[si] = hi[si];
2060 scratchMem.state[si] = true;
2061 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
2062 break;
2063 }
2064
2065 if (cmd <= 3) break;
2066 } // for (;;)
2067 } // else
2068
2069 if (s_error)
2070 {
2071 break;
2072 }
2073 } // for (int i=adj_nub; i<n; ++i)
2074
2075 lcp.unpermute();
2076
2077
2078 return !s_error;
2079 }
2080
2081