• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.] */
56 
57 #include <openssl/bn.h>
58 
59 #include <assert.h>
60 
61 #include "internal.h"
62 
63 
64 /* This file has two other implementations: x86 assembly language in
65  * asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. */
66 #if defined(OPENSSL_NO_ASM) || \
67     !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
68 
69 #ifdef BN_ULLONG
70 #define mul_add(r, a, w, c)               \
71   do {                                    \
72     BN_ULLONG t;                          \
73     t = (BN_ULLONG)(w) * (a) + (r) + (c); \
74     (r) = Lw(t);                          \
75     (c) = Hw(t);                          \
76   } while (0)
77 
78 #define mul(r, a, w, c)             \
79   do {                              \
80     BN_ULLONG t;                    \
81     t = (BN_ULLONG)(w) * (a) + (c); \
82     (r) = Lw(t);                    \
83     (c) = Hw(t);                    \
84   } while (0)
85 
86 #define sqr(r0, r1, a)        \
87   do {                        \
88     BN_ULLONG t;              \
89     t = (BN_ULLONG)(a) * (a); \
90     (r0) = Lw(t);             \
91     (r1) = Hw(t);             \
92   } while (0)
93 
94 #else
95 
96 #define mul_add(r, a, w, c)             \
97   do {                                  \
98     BN_ULONG high, low, ret, tmp = (a); \
99     ret = (r);                          \
100     BN_UMULT_LOHI(low, high, w, tmp);   \
101     ret += (c);                         \
102     (c) = (ret < (c)) ? 1 : 0;          \
103     (c) += high;                        \
104     ret += low;                         \
105     (c) += (ret < low) ? 1 : 0;         \
106     (r) = ret;                          \
107   } while (0)
108 
109 #define mul(r, a, w, c)                \
110   do {                                 \
111     BN_ULONG high, low, ret, ta = (a); \
112     BN_UMULT_LOHI(low, high, w, ta);   \
113     ret = low + (c);                   \
114     (c) = high;                        \
115     (c) += (ret < low) ? 1 : 0;        \
116     (r) = ret;                         \
117   } while (0)
118 
119 #define sqr(r0, r1, a)               \
120   do {                               \
121     BN_ULONG tmp = (a);              \
122     BN_UMULT_LOHI(r0, r1, tmp, tmp); \
123   } while (0)
124 
125 #endif /* !BN_ULLONG */
126 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)127 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
128                           BN_ULONG w) {
129   BN_ULONG c1 = 0;
130 
131   assert(num >= 0);
132   if (num <= 0) {
133     return c1;
134   }
135 
136   while (num & ~3) {
137     mul_add(rp[0], ap[0], w, c1);
138     mul_add(rp[1], ap[1], w, c1);
139     mul_add(rp[2], ap[2], w, c1);
140     mul_add(rp[3], ap[3], w, c1);
141     ap += 4;
142     rp += 4;
143     num -= 4;
144   }
145 
146   while (num) {
147     mul_add(rp[0], ap[0], w, c1);
148     ap++;
149     rp++;
150     num--;
151   }
152 
153   return c1;
154 }
155 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) {
157   BN_ULONG c1 = 0;
158 
159   assert(num >= 0);
160   if (num <= 0) {
161     return c1;
162   }
163 
164   while (num & ~3) {
165     mul(rp[0], ap[0], w, c1);
166     mul(rp[1], ap[1], w, c1);
167     mul(rp[2], ap[2], w, c1);
168     mul(rp[3], ap[3], w, c1);
169     ap += 4;
170     rp += 4;
171     num -= 4;
172   }
173   while (num) {
174     mul(rp[0], ap[0], w, c1);
175     ap++;
176     rp++;
177     num--;
178   }
179   return c1;
180 }
181 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) {
183   assert(n >= 0);
184   if (n <= 0) {
185     return;
186   }
187 
188   while (n & ~3) {
189     sqr(r[0], r[1], a[0]);
190     sqr(r[2], r[3], a[1]);
191     sqr(r[4], r[5], a[2]);
192     sqr(r[6], r[7], a[3]);
193     a += 4;
194     r += 8;
195     n -= 4;
196   }
197   while (n) {
198     sqr(r[0], r[1], a[0]);
199     a++;
200     r += 2;
201     n--;
202   }
203 }
204 
205 #ifdef BN_ULLONG
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)206 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
207                       int n) {
208   BN_ULLONG ll = 0;
209 
210   assert(n >= 0);
211   if (n <= 0) {
212     return (BN_ULONG)0;
213   }
214 
215   while (n & ~3) {
216     ll += (BN_ULLONG)a[0] + b[0];
217     r[0] = (BN_ULONG)ll & BN_MASK2;
218     ll >>= BN_BITS2;
219     ll += (BN_ULLONG)a[1] + b[1];
220     r[1] = (BN_ULONG)ll & BN_MASK2;
221     ll >>= BN_BITS2;
222     ll += (BN_ULLONG)a[2] + b[2];
223     r[2] = (BN_ULONG)ll & BN_MASK2;
224     ll >>= BN_BITS2;
225     ll += (BN_ULLONG)a[3] + b[3];
226     r[3] = (BN_ULONG)ll & BN_MASK2;
227     ll >>= BN_BITS2;
228     a += 4;
229     b += 4;
230     r += 4;
231     n -= 4;
232   }
233   while (n) {
234     ll += (BN_ULLONG)a[0] + b[0];
235     r[0] = (BN_ULONG)ll & BN_MASK2;
236     ll >>= BN_BITS2;
237     a++;
238     b++;
239     r++;
240     n--;
241   }
242   return (BN_ULONG)ll;
243 }
244 
245 #else /* !BN_ULLONG */
246 
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)247 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
248                       int n) {
249   BN_ULONG c, l, t;
250 
251   assert(n >= 0);
252   if (n <= 0) {
253     return (BN_ULONG)0;
254   }
255 
256   c = 0;
257   while (n & ~3) {
258     t = a[0];
259     t = (t + c) & BN_MASK2;
260     c = (t < c);
261     l = (t + b[0]) & BN_MASK2;
262     c += (l < t);
263     r[0] = l;
264     t = a[1];
265     t = (t + c) & BN_MASK2;
266     c = (t < c);
267     l = (t + b[1]) & BN_MASK2;
268     c += (l < t);
269     r[1] = l;
270     t = a[2];
271     t = (t + c) & BN_MASK2;
272     c = (t < c);
273     l = (t + b[2]) & BN_MASK2;
274     c += (l < t);
275     r[2] = l;
276     t = a[3];
277     t = (t + c) & BN_MASK2;
278     c = (t < c);
279     l = (t + b[3]) & BN_MASK2;
280     c += (l < t);
281     r[3] = l;
282     a += 4;
283     b += 4;
284     r += 4;
285     n -= 4;
286   }
287   while (n) {
288     t = a[0];
289     t = (t + c) & BN_MASK2;
290     c = (t < c);
291     l = (t + b[0]) & BN_MASK2;
292     c += (l < t);
293     r[0] = l;
294     a++;
295     b++;
296     r++;
297     n--;
298   }
299   return (BN_ULONG)c;
300 }
301 
302 #endif /* !BN_ULLONG */
303 
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)304 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
305                       int n) {
306   BN_ULONG t1, t2;
307   int c = 0;
308 
309   assert(n >= 0);
310   if (n <= 0) {
311     return (BN_ULONG)0;
312   }
313 
314   while (n & ~3) {
315     t1 = a[0];
316     t2 = b[0];
317     r[0] = (t1 - t2 - c) & BN_MASK2;
318     if (t1 != t2) {
319       c = (t1 < t2);
320     }
321     t1 = a[1];
322     t2 = b[1];
323     r[1] = (t1 - t2 - c) & BN_MASK2;
324     if (t1 != t2) {
325       c = (t1 < t2);
326     }
327     t1 = a[2];
328     t2 = b[2];
329     r[2] = (t1 - t2 - c) & BN_MASK2;
330     if (t1 != t2) {
331       c = (t1 < t2);
332     }
333     t1 = a[3];
334     t2 = b[3];
335     r[3] = (t1 - t2 - c) & BN_MASK2;
336     if (t1 != t2) {
337       c = (t1 < t2);
338     }
339     a += 4;
340     b += 4;
341     r += 4;
342     n -= 4;
343   }
344   while (n) {
345     t1 = a[0];
346     t2 = b[0];
347     r[0] = (t1 - t2 - c) & BN_MASK2;
348     if (t1 != t2) {
349       c = (t1 < t2);
350     }
351     a++;
352     b++;
353     r++;
354     n--;
355   }
356   return c;
357 }
358 
359 /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
360 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
361 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
362 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
363 
364 #ifdef BN_ULLONG
365 
366 /* Keep in mind that additions to multiplication result can not overflow,
367  * because its high half cannot be all-ones. */
368 #define mul_add_c(a, b, c0, c1, c2)     \
369   do {                                  \
370     BN_ULONG hi;                        \
371     BN_ULLONG t = (BN_ULLONG)(a) * (b); \
372     t += (c0); /* no carry */           \
373     (c0) = (BN_ULONG)Lw(t);             \
374     hi = (BN_ULONG)Hw(t);               \
375     (c1) = ((c1) + (hi)) & BN_MASK2;    \
376     if ((c1) < hi) {                    \
377       (c2)++;                           \
378     }                                   \
379   } while (0)
380 
381 #define mul_add_c2(a, b, c0, c1, c2)        \
382   do {                                      \
383     BN_ULONG hi;                            \
384     BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
385     BN_ULLONG tt = t + (c0); /* no carry */ \
386     (c0) = (BN_ULONG)Lw(tt);                \
387     hi = (BN_ULONG)Hw(tt);                  \
388     (c1) = ((c1) + hi) & BN_MASK2;          \
389     if ((c1) < hi) {                        \
390       (c2)++;                               \
391     }                                       \
392     t += (c0); /* no carry */               \
393     (c0) = (BN_ULONG)Lw(t);                 \
394     hi = (BN_ULONG)Hw(t);                   \
395     (c1) = ((c1) + hi) & BN_MASK2;          \
396     if ((c1) < hi) {                        \
397       (c2)++;                               \
398     }                                       \
399   } while (0)
400 
401 #define sqr_add_c(a, i, c0, c1, c2)           \
402   do {                                        \
403     BN_ULONG hi;                              \
404     BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
405     t += (c0); /* no carry */                 \
406     (c0) = (BN_ULONG)Lw(t);                   \
407     hi = (BN_ULONG)Hw(t);                     \
408     (c1) = ((c1) + hi) & BN_MASK2;            \
409     if ((c1) < hi) {                          \
410       (c2)++;                                 \
411     }                                         \
412   } while (0)
413 
414 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
415 
416 #else
417 
418 /* Keep in mind that additions to hi can not overflow, because the high word of
419  * a multiplication result cannot be all-ones. */
420 #define mul_add_c(a, b, c0, c1, c2) \
421   do {                              \
422     BN_ULONG ta = (a), tb = (b);    \
423     BN_ULONG lo, hi;                \
424     BN_UMULT_LOHI(lo, hi, ta, tb);  \
425     (c0) += lo;                     \
426     hi += ((c0) < lo) ? 1 : 0;      \
427     (c1) += hi;                     \
428     (c2) += ((c1) < hi) ? 1 : 0;    \
429   } while (0)
430 
431 #define mul_add_c2(a, b, c0, c1, c2) \
432   do {                               \
433     BN_ULONG ta = (a), tb = (b);     \
434     BN_ULONG lo, hi, tt;             \
435     BN_UMULT_LOHI(lo, hi, ta, tb);   \
436     (c0) += lo;                      \
437     tt = hi + (((c0) < lo) ? 1 : 0); \
438     (c1) += tt;                      \
439     (c2) += ((c1) < tt) ? 1 : 0;     \
440     (c0) += lo;                      \
441     hi += (c0 < lo) ? 1 : 0;         \
442     (c1) += hi;                      \
443     (c2) += ((c1) < hi) ? 1 : 0;     \
444   } while (0)
445 
446 #define sqr_add_c(a, i, c0, c1, c2) \
447   do {                              \
448     BN_ULONG ta = (a)[i];           \
449     BN_ULONG lo, hi;                \
450     BN_UMULT_LOHI(lo, hi, ta, ta);  \
451     (c0) += lo;                     \
452     hi += (c0 < lo) ? 1 : 0;        \
453     (c1) += hi;                     \
454     (c2) += ((c1) < hi) ? 1 : 0;    \
455   } while (0)
456 
457 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
458 
459 #endif /* !BN_ULLONG */
460 
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)461 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
462   BN_ULONG c1, c2, c3;
463 
464   c1 = 0;
465   c2 = 0;
466   c3 = 0;
467   mul_add_c(a[0], b[0], c1, c2, c3);
468   r[0] = c1;
469   c1 = 0;
470   mul_add_c(a[0], b[1], c2, c3, c1);
471   mul_add_c(a[1], b[0], c2, c3, c1);
472   r[1] = c2;
473   c2 = 0;
474   mul_add_c(a[2], b[0], c3, c1, c2);
475   mul_add_c(a[1], b[1], c3, c1, c2);
476   mul_add_c(a[0], b[2], c3, c1, c2);
477   r[2] = c3;
478   c3 = 0;
479   mul_add_c(a[0], b[3], c1, c2, c3);
480   mul_add_c(a[1], b[2], c1, c2, c3);
481   mul_add_c(a[2], b[1], c1, c2, c3);
482   mul_add_c(a[3], b[0], c1, c2, c3);
483   r[3] = c1;
484   c1 = 0;
485   mul_add_c(a[4], b[0], c2, c3, c1);
486   mul_add_c(a[3], b[1], c2, c3, c1);
487   mul_add_c(a[2], b[2], c2, c3, c1);
488   mul_add_c(a[1], b[3], c2, c3, c1);
489   mul_add_c(a[0], b[4], c2, c3, c1);
490   r[4] = c2;
491   c2 = 0;
492   mul_add_c(a[0], b[5], c3, c1, c2);
493   mul_add_c(a[1], b[4], c3, c1, c2);
494   mul_add_c(a[2], b[3], c3, c1, c2);
495   mul_add_c(a[3], b[2], c3, c1, c2);
496   mul_add_c(a[4], b[1], c3, c1, c2);
497   mul_add_c(a[5], b[0], c3, c1, c2);
498   r[5] = c3;
499   c3 = 0;
500   mul_add_c(a[6], b[0], c1, c2, c3);
501   mul_add_c(a[5], b[1], c1, c2, c3);
502   mul_add_c(a[4], b[2], c1, c2, c3);
503   mul_add_c(a[3], b[3], c1, c2, c3);
504   mul_add_c(a[2], b[4], c1, c2, c3);
505   mul_add_c(a[1], b[5], c1, c2, c3);
506   mul_add_c(a[0], b[6], c1, c2, c3);
507   r[6] = c1;
508   c1 = 0;
509   mul_add_c(a[0], b[7], c2, c3, c1);
510   mul_add_c(a[1], b[6], c2, c3, c1);
511   mul_add_c(a[2], b[5], c2, c3, c1);
512   mul_add_c(a[3], b[4], c2, c3, c1);
513   mul_add_c(a[4], b[3], c2, c3, c1);
514   mul_add_c(a[5], b[2], c2, c3, c1);
515   mul_add_c(a[6], b[1], c2, c3, c1);
516   mul_add_c(a[7], b[0], c2, c3, c1);
517   r[7] = c2;
518   c2 = 0;
519   mul_add_c(a[7], b[1], c3, c1, c2);
520   mul_add_c(a[6], b[2], c3, c1, c2);
521   mul_add_c(a[5], b[3], c3, c1, c2);
522   mul_add_c(a[4], b[4], c3, c1, c2);
523   mul_add_c(a[3], b[5], c3, c1, c2);
524   mul_add_c(a[2], b[6], c3, c1, c2);
525   mul_add_c(a[1], b[7], c3, c1, c2);
526   r[8] = c3;
527   c3 = 0;
528   mul_add_c(a[2], b[7], c1, c2, c3);
529   mul_add_c(a[3], b[6], c1, c2, c3);
530   mul_add_c(a[4], b[5], c1, c2, c3);
531   mul_add_c(a[5], b[4], c1, c2, c3);
532   mul_add_c(a[6], b[3], c1, c2, c3);
533   mul_add_c(a[7], b[2], c1, c2, c3);
534   r[9] = c1;
535   c1 = 0;
536   mul_add_c(a[7], b[3], c2, c3, c1);
537   mul_add_c(a[6], b[4], c2, c3, c1);
538   mul_add_c(a[5], b[5], c2, c3, c1);
539   mul_add_c(a[4], b[6], c2, c3, c1);
540   mul_add_c(a[3], b[7], c2, c3, c1);
541   r[10] = c2;
542   c2 = 0;
543   mul_add_c(a[4], b[7], c3, c1, c2);
544   mul_add_c(a[5], b[6], c3, c1, c2);
545   mul_add_c(a[6], b[5], c3, c1, c2);
546   mul_add_c(a[7], b[4], c3, c1, c2);
547   r[11] = c3;
548   c3 = 0;
549   mul_add_c(a[7], b[5], c1, c2, c3);
550   mul_add_c(a[6], b[6], c1, c2, c3);
551   mul_add_c(a[5], b[7], c1, c2, c3);
552   r[12] = c1;
553   c1 = 0;
554   mul_add_c(a[6], b[7], c2, c3, c1);
555   mul_add_c(a[7], b[6], c2, c3, c1);
556   r[13] = c2;
557   c2 = 0;
558   mul_add_c(a[7], b[7], c3, c1, c2);
559   r[14] = c3;
560   r[15] = c1;
561 }
562 
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)563 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
564   BN_ULONG c1, c2, c3;
565 
566   c1 = 0;
567   c2 = 0;
568   c3 = 0;
569   mul_add_c(a[0], b[0], c1, c2, c3);
570   r[0] = c1;
571   c1 = 0;
572   mul_add_c(a[0], b[1], c2, c3, c1);
573   mul_add_c(a[1], b[0], c2, c3, c1);
574   r[1] = c2;
575   c2 = 0;
576   mul_add_c(a[2], b[0], c3, c1, c2);
577   mul_add_c(a[1], b[1], c3, c1, c2);
578   mul_add_c(a[0], b[2], c3, c1, c2);
579   r[2] = c3;
580   c3 = 0;
581   mul_add_c(a[0], b[3], c1, c2, c3);
582   mul_add_c(a[1], b[2], c1, c2, c3);
583   mul_add_c(a[2], b[1], c1, c2, c3);
584   mul_add_c(a[3], b[0], c1, c2, c3);
585   r[3] = c1;
586   c1 = 0;
587   mul_add_c(a[3], b[1], c2, c3, c1);
588   mul_add_c(a[2], b[2], c2, c3, c1);
589   mul_add_c(a[1], b[3], c2, c3, c1);
590   r[4] = c2;
591   c2 = 0;
592   mul_add_c(a[2], b[3], c3, c1, c2);
593   mul_add_c(a[3], b[2], c3, c1, c2);
594   r[5] = c3;
595   c3 = 0;
596   mul_add_c(a[3], b[3], c1, c2, c3);
597   r[6] = c1;
598   r[7] = c2;
599 }
600 
bn_sqr_comba8(BN_ULONG * r,const BN_ULONG * a)601 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) {
602   BN_ULONG c1, c2, c3;
603 
604   c1 = 0;
605   c2 = 0;
606   c3 = 0;
607   sqr_add_c(a, 0, c1, c2, c3);
608   r[0] = c1;
609   c1 = 0;
610   sqr_add_c2(a, 1, 0, c2, c3, c1);
611   r[1] = c2;
612   c2 = 0;
613   sqr_add_c(a, 1, c3, c1, c2);
614   sqr_add_c2(a, 2, 0, c3, c1, c2);
615   r[2] = c3;
616   c3 = 0;
617   sqr_add_c2(a, 3, 0, c1, c2, c3);
618   sqr_add_c2(a, 2, 1, c1, c2, c3);
619   r[3] = c1;
620   c1 = 0;
621   sqr_add_c(a, 2, c2, c3, c1);
622   sqr_add_c2(a, 3, 1, c2, c3, c1);
623   sqr_add_c2(a, 4, 0, c2, c3, c1);
624   r[4] = c2;
625   c2 = 0;
626   sqr_add_c2(a, 5, 0, c3, c1, c2);
627   sqr_add_c2(a, 4, 1, c3, c1, c2);
628   sqr_add_c2(a, 3, 2, c3, c1, c2);
629   r[5] = c3;
630   c3 = 0;
631   sqr_add_c(a, 3, c1, c2, c3);
632   sqr_add_c2(a, 4, 2, c1, c2, c3);
633   sqr_add_c2(a, 5, 1, c1, c2, c3);
634   sqr_add_c2(a, 6, 0, c1, c2, c3);
635   r[6] = c1;
636   c1 = 0;
637   sqr_add_c2(a, 7, 0, c2, c3, c1);
638   sqr_add_c2(a, 6, 1, c2, c3, c1);
639   sqr_add_c2(a, 5, 2, c2, c3, c1);
640   sqr_add_c2(a, 4, 3, c2, c3, c1);
641   r[7] = c2;
642   c2 = 0;
643   sqr_add_c(a, 4, c3, c1, c2);
644   sqr_add_c2(a, 5, 3, c3, c1, c2);
645   sqr_add_c2(a, 6, 2, c3, c1, c2);
646   sqr_add_c2(a, 7, 1, c3, c1, c2);
647   r[8] = c3;
648   c3 = 0;
649   sqr_add_c2(a, 7, 2, c1, c2, c3);
650   sqr_add_c2(a, 6, 3, c1, c2, c3);
651   sqr_add_c2(a, 5, 4, c1, c2, c3);
652   r[9] = c1;
653   c1 = 0;
654   sqr_add_c(a, 5, c2, c3, c1);
655   sqr_add_c2(a, 6, 4, c2, c3, c1);
656   sqr_add_c2(a, 7, 3, c2, c3, c1);
657   r[10] = c2;
658   c2 = 0;
659   sqr_add_c2(a, 7, 4, c3, c1, c2);
660   sqr_add_c2(a, 6, 5, c3, c1, c2);
661   r[11] = c3;
662   c3 = 0;
663   sqr_add_c(a, 6, c1, c2, c3);
664   sqr_add_c2(a, 7, 5, c1, c2, c3);
665   r[12] = c1;
666   c1 = 0;
667   sqr_add_c2(a, 7, 6, c2, c3, c1);
668   r[13] = c2;
669   c2 = 0;
670   sqr_add_c(a, 7, c3, c1, c2);
671   r[14] = c3;
672   r[15] = c1;
673 }
674 
bn_sqr_comba4(BN_ULONG * r,const BN_ULONG * a)675 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
676   BN_ULONG c1, c2, c3;
677 
678   c1 = 0;
679   c2 = 0;
680   c3 = 0;
681   sqr_add_c(a, 0, c1, c2, c3);
682   r[0] = c1;
683   c1 = 0;
684   sqr_add_c2(a, 1, 0, c2, c3, c1);
685   r[1] = c2;
686   c2 = 0;
687   sqr_add_c(a, 1, c3, c1, c2);
688   sqr_add_c2(a, 2, 0, c3, c1, c2);
689   r[2] = c3;
690   c3 = 0;
691   sqr_add_c2(a, 3, 0, c1, c2, c3);
692   sqr_add_c2(a, 2, 1, c1, c2, c3);
693   r[3] = c1;
694   c1 = 0;
695   sqr_add_c(a, 2, c2, c3, c1);
696   sqr_add_c2(a, 3, 1, c2, c3, c1);
697   r[4] = c2;
698   c2 = 0;
699   sqr_add_c2(a, 3, 2, c3, c1, c2);
700   r[5] = c3;
701   c3 = 0;
702   sqr_add_c(a, 3, c1, c2, c3);
703   r[6] = c1;
704   r[7] = c2;
705 }
706 
707 #undef mul_add
708 #undef mul
709 #undef sqr
710 #undef mul_add_c
711 #undef mul_add_c2
712 #undef sqr_add_c
713 #undef sqr_add_c2
714 
715 #endif
716