• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.] */
56 
57 #include <openssl/bn.h>
58 
59 #include <assert.h>
60 
61 #include "internal.h"
62 
63 
64 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
65 // See asm/bn-586.pl.
66 #define BN_ADD_ASM
67 #define BN_MUL_ASM
68 #endif
69 
70 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
71     (defined(__GNUC__) || defined(__clang__))
72 // See asm/x86_64-gcc.c
73 #define BN_ADD_ASM
74 #define BN_MUL_ASM
75 #endif
76 
77 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
78 // See asm/bn-armv8.pl.
79 #define BN_ADD_ASM
80 #endif
81 
82 #if !defined(BN_MUL_ASM)
83 
84 #ifdef BN_ULLONG
85 #define mul_add(r, a, w, c)               \
86   do {                                    \
87     BN_ULLONG t;                          \
88     t = (BN_ULLONG)(w) * (a) + (r) + (c); \
89     (r) = Lw(t);                          \
90     (c) = Hw(t);                          \
91   } while (0)
92 
93 #define mul(r, a, w, c)             \
94   do {                              \
95     BN_ULLONG t;                    \
96     t = (BN_ULLONG)(w) * (a) + (c); \
97     (r) = Lw(t);                    \
98     (c) = Hw(t);                    \
99   } while (0)
100 
101 #define sqr(r0, r1, a)        \
102   do {                        \
103     BN_ULLONG t;              \
104     t = (BN_ULLONG)(a) * (a); \
105     (r0) = Lw(t);             \
106     (r1) = Hw(t);             \
107   } while (0)
108 
109 #else
110 
111 #define mul_add(r, a, w, c)             \
112   do {                                  \
113     BN_ULONG high, low, ret, tmp = (a); \
114     ret = (r);                          \
115     BN_UMULT_LOHI(low, high, w, tmp);   \
116     ret += (c);                         \
117     (c) = (ret < (c)) ? 1 : 0;          \
118     (c) += high;                        \
119     ret += low;                         \
120     (c) += (ret < low) ? 1 : 0;         \
121     (r) = ret;                          \
122   } while (0)
123 
124 #define mul(r, a, w, c)                \
125   do {                                 \
126     BN_ULONG high, low, ret, ta = (a); \
127     BN_UMULT_LOHI(low, high, w, ta);   \
128     ret = low + (c);                   \
129     (c) = high;                        \
130     (c) += (ret < low) ? 1 : 0;        \
131     (r) = ret;                         \
132   } while (0)
133 
134 #define sqr(r0, r1, a)               \
135   do {                               \
136     BN_ULONG tmp = (a);              \
137     BN_UMULT_LOHI(r0, r1, tmp, tmp); \
138   } while (0)
139 
140 #endif  // !BN_ULLONG
141 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)142 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
143                           BN_ULONG w) {
144   BN_ULONG c1 = 0;
145 
146   if (num == 0) {
147     return c1;
148   }
149 
150   while (num & ~3) {
151     mul_add(rp[0], ap[0], w, c1);
152     mul_add(rp[1], ap[1], w, c1);
153     mul_add(rp[2], ap[2], w, c1);
154     mul_add(rp[3], ap[3], w, c1);
155     ap += 4;
156     rp += 4;
157     num -= 4;
158   }
159 
160   while (num) {
161     mul_add(rp[0], ap[0], w, c1);
162     ap++;
163     rp++;
164     num--;
165   }
166 
167   return c1;
168 }
169 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)170 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
171                       BN_ULONG w) {
172   BN_ULONG c1 = 0;
173 
174   if (num == 0) {
175     return c1;
176   }
177 
178   while (num & ~3) {
179     mul(rp[0], ap[0], w, c1);
180     mul(rp[1], ap[1], w, c1);
181     mul(rp[2], ap[2], w, c1);
182     mul(rp[3], ap[3], w, c1);
183     ap += 4;
184     rp += 4;
185     num -= 4;
186   }
187   while (num) {
188     mul(rp[0], ap[0], w, c1);
189     ap++;
190     rp++;
191     num--;
192   }
193   return c1;
194 }
195 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,size_t n)196 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
197   if (n == 0) {
198     return;
199   }
200 
201   while (n & ~3) {
202     sqr(r[0], r[1], a[0]);
203     sqr(r[2], r[3], a[1]);
204     sqr(r[4], r[5], a[2]);
205     sqr(r[6], r[7], a[3]);
206     a += 4;
207     r += 8;
208     n -= 4;
209   }
210   while (n) {
211     sqr(r[0], r[1], a[0]);
212     a++;
213     r += 2;
214     n--;
215   }
216 }
217 
218 // mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
219 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
220 // sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
221 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
222 
223 #ifdef BN_ULLONG
224 
225 // Keep in mind that additions to multiplication result can not overflow,
226 // because its high half cannot be all-ones.
227 #define mul_add_c(a, b, c0, c1, c2)     \
228   do {                                  \
229     BN_ULONG hi;                        \
230     BN_ULLONG t = (BN_ULLONG)(a) * (b); \
231     t += (c0); /* no carry */           \
232     (c0) = (BN_ULONG)Lw(t);             \
233     hi = (BN_ULONG)Hw(t);               \
234     (c1) += (hi);                       \
235     (c2) += (c1) < hi;                  \
236   } while (0)
237 
238 #define mul_add_c2(a, b, c0, c1, c2)        \
239   do {                                      \
240     BN_ULONG hi;                            \
241     BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
242     BN_ULLONG tt = t + (c0); /* no carry */ \
243     (c0) = (BN_ULONG)Lw(tt);                \
244     hi = (BN_ULONG)Hw(tt);                  \
245     (c1) += hi;                             \
246     (c2) += (c1) < hi;                      \
247     t += (c0); /* no carry */               \
248     (c0) = (BN_ULONG)Lw(t);                 \
249     hi = (BN_ULONG)Hw(t);                   \
250     (c1) += hi;                             \
251     (c2) += (c1) < hi;                      \
252   } while (0)
253 
254 #define sqr_add_c(a, i, c0, c1, c2)           \
255   do {                                        \
256     BN_ULONG hi;                              \
257     BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
258     t += (c0); /* no carry */                 \
259     (c0) = (BN_ULONG)Lw(t);                   \
260     hi = (BN_ULONG)Hw(t);                     \
261     (c1) += hi;                               \
262     (c2) += (c1) < hi;                        \
263   } while (0)
264 
265 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
266 
267 #else
268 
269 // Keep in mind that additions to hi can not overflow, because the high word of
270 // a multiplication result cannot be all-ones.
271 #define mul_add_c(a, b, c0, c1, c2) \
272   do {                              \
273     BN_ULONG ta = (a), tb = (b);    \
274     BN_ULONG lo, hi;                \
275     BN_UMULT_LOHI(lo, hi, ta, tb);  \
276     (c0) += lo;                     \
277     hi += ((c0) < lo) ? 1 : 0;      \
278     (c1) += hi;                     \
279     (c2) += ((c1) < hi) ? 1 : 0;    \
280   } while (0)
281 
282 #define mul_add_c2(a, b, c0, c1, c2) \
283   do {                               \
284     BN_ULONG ta = (a), tb = (b);     \
285     BN_ULONG lo, hi, tt;             \
286     BN_UMULT_LOHI(lo, hi, ta, tb);   \
287     (c0) += lo;                      \
288     tt = hi + (((c0) < lo) ? 1 : 0); \
289     (c1) += tt;                      \
290     (c2) += ((c1) < tt) ? 1 : 0;     \
291     (c0) += lo;                      \
292     hi += (c0 < lo) ? 1 : 0;         \
293     (c1) += hi;                      \
294     (c2) += ((c1) < hi) ? 1 : 0;     \
295   } while (0)
296 
297 #define sqr_add_c(a, i, c0, c1, c2) \
298   do {                              \
299     BN_ULONG ta = (a)[i];           \
300     BN_ULONG lo, hi;                \
301     BN_UMULT_LOHI(lo, hi, ta, ta);  \
302     (c0) += lo;                     \
303     hi += (c0 < lo) ? 1 : 0;        \
304     (c1) += hi;                     \
305     (c2) += ((c1) < hi) ? 1 : 0;    \
306   } while (0)
307 
308 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
309 
310 #endif  // !BN_ULLONG
311 
bn_mul_comba8(BN_ULONG r[16],const BN_ULONG a[8],const BN_ULONG b[8])312 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
313   BN_ULONG c1, c2, c3;
314 
315   c1 = 0;
316   c2 = 0;
317   c3 = 0;
318   mul_add_c(a[0], b[0], c1, c2, c3);
319   r[0] = c1;
320   c1 = 0;
321   mul_add_c(a[0], b[1], c2, c3, c1);
322   mul_add_c(a[1], b[0], c2, c3, c1);
323   r[1] = c2;
324   c2 = 0;
325   mul_add_c(a[2], b[0], c3, c1, c2);
326   mul_add_c(a[1], b[1], c3, c1, c2);
327   mul_add_c(a[0], b[2], c3, c1, c2);
328   r[2] = c3;
329   c3 = 0;
330   mul_add_c(a[0], b[3], c1, c2, c3);
331   mul_add_c(a[1], b[2], c1, c2, c3);
332   mul_add_c(a[2], b[1], c1, c2, c3);
333   mul_add_c(a[3], b[0], c1, c2, c3);
334   r[3] = c1;
335   c1 = 0;
336   mul_add_c(a[4], b[0], c2, c3, c1);
337   mul_add_c(a[3], b[1], c2, c3, c1);
338   mul_add_c(a[2], b[2], c2, c3, c1);
339   mul_add_c(a[1], b[3], c2, c3, c1);
340   mul_add_c(a[0], b[4], c2, c3, c1);
341   r[4] = c2;
342   c2 = 0;
343   mul_add_c(a[0], b[5], c3, c1, c2);
344   mul_add_c(a[1], b[4], c3, c1, c2);
345   mul_add_c(a[2], b[3], c3, c1, c2);
346   mul_add_c(a[3], b[2], c3, c1, c2);
347   mul_add_c(a[4], b[1], c3, c1, c2);
348   mul_add_c(a[5], b[0], c3, c1, c2);
349   r[5] = c3;
350   c3 = 0;
351   mul_add_c(a[6], b[0], c1, c2, c3);
352   mul_add_c(a[5], b[1], c1, c2, c3);
353   mul_add_c(a[4], b[2], c1, c2, c3);
354   mul_add_c(a[3], b[3], c1, c2, c3);
355   mul_add_c(a[2], b[4], c1, c2, c3);
356   mul_add_c(a[1], b[5], c1, c2, c3);
357   mul_add_c(a[0], b[6], c1, c2, c3);
358   r[6] = c1;
359   c1 = 0;
360   mul_add_c(a[0], b[7], c2, c3, c1);
361   mul_add_c(a[1], b[6], c2, c3, c1);
362   mul_add_c(a[2], b[5], c2, c3, c1);
363   mul_add_c(a[3], b[4], c2, c3, c1);
364   mul_add_c(a[4], b[3], c2, c3, c1);
365   mul_add_c(a[5], b[2], c2, c3, c1);
366   mul_add_c(a[6], b[1], c2, c3, c1);
367   mul_add_c(a[7], b[0], c2, c3, c1);
368   r[7] = c2;
369   c2 = 0;
370   mul_add_c(a[7], b[1], c3, c1, c2);
371   mul_add_c(a[6], b[2], c3, c1, c2);
372   mul_add_c(a[5], b[3], c3, c1, c2);
373   mul_add_c(a[4], b[4], c3, c1, c2);
374   mul_add_c(a[3], b[5], c3, c1, c2);
375   mul_add_c(a[2], b[6], c3, c1, c2);
376   mul_add_c(a[1], b[7], c3, c1, c2);
377   r[8] = c3;
378   c3 = 0;
379   mul_add_c(a[2], b[7], c1, c2, c3);
380   mul_add_c(a[3], b[6], c1, c2, c3);
381   mul_add_c(a[4], b[5], c1, c2, c3);
382   mul_add_c(a[5], b[4], c1, c2, c3);
383   mul_add_c(a[6], b[3], c1, c2, c3);
384   mul_add_c(a[7], b[2], c1, c2, c3);
385   r[9] = c1;
386   c1 = 0;
387   mul_add_c(a[7], b[3], c2, c3, c1);
388   mul_add_c(a[6], b[4], c2, c3, c1);
389   mul_add_c(a[5], b[5], c2, c3, c1);
390   mul_add_c(a[4], b[6], c2, c3, c1);
391   mul_add_c(a[3], b[7], c2, c3, c1);
392   r[10] = c2;
393   c2 = 0;
394   mul_add_c(a[4], b[7], c3, c1, c2);
395   mul_add_c(a[5], b[6], c3, c1, c2);
396   mul_add_c(a[6], b[5], c3, c1, c2);
397   mul_add_c(a[7], b[4], c3, c1, c2);
398   r[11] = c3;
399   c3 = 0;
400   mul_add_c(a[7], b[5], c1, c2, c3);
401   mul_add_c(a[6], b[6], c1, c2, c3);
402   mul_add_c(a[5], b[7], c1, c2, c3);
403   r[12] = c1;
404   c1 = 0;
405   mul_add_c(a[6], b[7], c2, c3, c1);
406   mul_add_c(a[7], b[6], c2, c3, c1);
407   r[13] = c2;
408   c2 = 0;
409   mul_add_c(a[7], b[7], c3, c1, c2);
410   r[14] = c3;
411   r[15] = c1;
412 }
413 
bn_mul_comba4(BN_ULONG r[8],const BN_ULONG a[4],const BN_ULONG b[4])414 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
415   BN_ULONG c1, c2, c3;
416 
417   c1 = 0;
418   c2 = 0;
419   c3 = 0;
420   mul_add_c(a[0], b[0], c1, c2, c3);
421   r[0] = c1;
422   c1 = 0;
423   mul_add_c(a[0], b[1], c2, c3, c1);
424   mul_add_c(a[1], b[0], c2, c3, c1);
425   r[1] = c2;
426   c2 = 0;
427   mul_add_c(a[2], b[0], c3, c1, c2);
428   mul_add_c(a[1], b[1], c3, c1, c2);
429   mul_add_c(a[0], b[2], c3, c1, c2);
430   r[2] = c3;
431   c3 = 0;
432   mul_add_c(a[0], b[3], c1, c2, c3);
433   mul_add_c(a[1], b[2], c1, c2, c3);
434   mul_add_c(a[2], b[1], c1, c2, c3);
435   mul_add_c(a[3], b[0], c1, c2, c3);
436   r[3] = c1;
437   c1 = 0;
438   mul_add_c(a[3], b[1], c2, c3, c1);
439   mul_add_c(a[2], b[2], c2, c3, c1);
440   mul_add_c(a[1], b[3], c2, c3, c1);
441   r[4] = c2;
442   c2 = 0;
443   mul_add_c(a[2], b[3], c3, c1, c2);
444   mul_add_c(a[3], b[2], c3, c1, c2);
445   r[5] = c3;
446   c3 = 0;
447   mul_add_c(a[3], b[3], c1, c2, c3);
448   r[6] = c1;
449   r[7] = c2;
450 }
451 
bn_sqr_comba8(BN_ULONG r[16],const BN_ULONG a[8])452 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
453   BN_ULONG c1, c2, c3;
454 
455   c1 = 0;
456   c2 = 0;
457   c3 = 0;
458   sqr_add_c(a, 0, c1, c2, c3);
459   r[0] = c1;
460   c1 = 0;
461   sqr_add_c2(a, 1, 0, c2, c3, c1);
462   r[1] = c2;
463   c2 = 0;
464   sqr_add_c(a, 1, c3, c1, c2);
465   sqr_add_c2(a, 2, 0, c3, c1, c2);
466   r[2] = c3;
467   c3 = 0;
468   sqr_add_c2(a, 3, 0, c1, c2, c3);
469   sqr_add_c2(a, 2, 1, c1, c2, c3);
470   r[3] = c1;
471   c1 = 0;
472   sqr_add_c(a, 2, c2, c3, c1);
473   sqr_add_c2(a, 3, 1, c2, c3, c1);
474   sqr_add_c2(a, 4, 0, c2, c3, c1);
475   r[4] = c2;
476   c2 = 0;
477   sqr_add_c2(a, 5, 0, c3, c1, c2);
478   sqr_add_c2(a, 4, 1, c3, c1, c2);
479   sqr_add_c2(a, 3, 2, c3, c1, c2);
480   r[5] = c3;
481   c3 = 0;
482   sqr_add_c(a, 3, c1, c2, c3);
483   sqr_add_c2(a, 4, 2, c1, c2, c3);
484   sqr_add_c2(a, 5, 1, c1, c2, c3);
485   sqr_add_c2(a, 6, 0, c1, c2, c3);
486   r[6] = c1;
487   c1 = 0;
488   sqr_add_c2(a, 7, 0, c2, c3, c1);
489   sqr_add_c2(a, 6, 1, c2, c3, c1);
490   sqr_add_c2(a, 5, 2, c2, c3, c1);
491   sqr_add_c2(a, 4, 3, c2, c3, c1);
492   r[7] = c2;
493   c2 = 0;
494   sqr_add_c(a, 4, c3, c1, c2);
495   sqr_add_c2(a, 5, 3, c3, c1, c2);
496   sqr_add_c2(a, 6, 2, c3, c1, c2);
497   sqr_add_c2(a, 7, 1, c3, c1, c2);
498   r[8] = c3;
499   c3 = 0;
500   sqr_add_c2(a, 7, 2, c1, c2, c3);
501   sqr_add_c2(a, 6, 3, c1, c2, c3);
502   sqr_add_c2(a, 5, 4, c1, c2, c3);
503   r[9] = c1;
504   c1 = 0;
505   sqr_add_c(a, 5, c2, c3, c1);
506   sqr_add_c2(a, 6, 4, c2, c3, c1);
507   sqr_add_c2(a, 7, 3, c2, c3, c1);
508   r[10] = c2;
509   c2 = 0;
510   sqr_add_c2(a, 7, 4, c3, c1, c2);
511   sqr_add_c2(a, 6, 5, c3, c1, c2);
512   r[11] = c3;
513   c3 = 0;
514   sqr_add_c(a, 6, c1, c2, c3);
515   sqr_add_c2(a, 7, 5, c1, c2, c3);
516   r[12] = c1;
517   c1 = 0;
518   sqr_add_c2(a, 7, 6, c2, c3, c1);
519   r[13] = c2;
520   c2 = 0;
521   sqr_add_c(a, 7, c3, c1, c2);
522   r[14] = c3;
523   r[15] = c1;
524 }
525 
bn_sqr_comba4(BN_ULONG r[8],const BN_ULONG a[4])526 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
527   BN_ULONG c1, c2, c3;
528 
529   c1 = 0;
530   c2 = 0;
531   c3 = 0;
532   sqr_add_c(a, 0, c1, c2, c3);
533   r[0] = c1;
534   c1 = 0;
535   sqr_add_c2(a, 1, 0, c2, c3, c1);
536   r[1] = c2;
537   c2 = 0;
538   sqr_add_c(a, 1, c3, c1, c2);
539   sqr_add_c2(a, 2, 0, c3, c1, c2);
540   r[2] = c3;
541   c3 = 0;
542   sqr_add_c2(a, 3, 0, c1, c2, c3);
543   sqr_add_c2(a, 2, 1, c1, c2, c3);
544   r[3] = c1;
545   c1 = 0;
546   sqr_add_c(a, 2, c2, c3, c1);
547   sqr_add_c2(a, 3, 1, c2, c3, c1);
548   r[4] = c2;
549   c2 = 0;
550   sqr_add_c2(a, 3, 2, c3, c1, c2);
551   r[5] = c3;
552   c3 = 0;
553   sqr_add_c(a, 3, c1, c2, c3);
554   r[6] = c1;
555   r[7] = c2;
556 }
557 
558 #undef mul_add
559 #undef mul
560 #undef sqr
561 #undef mul_add_c
562 #undef mul_add_c2
563 #undef sqr_add_c
564 #undef sqr_add_c2
565 
566 #endif  // !BN_MUL_ASM
567 
568 #if !defined(BN_ADD_ASM)
569 
570 // bn_add_with_carry returns |x + y + carry|, and sets |*out_carry| to the
571 // carry bit. |carry| must be zero or one.
bn_add_with_carry(BN_ULONG x,BN_ULONG y,BN_ULONG carry,BN_ULONG * out_carry)572 static inline BN_ULONG bn_add_with_carry(BN_ULONG x, BN_ULONG y, BN_ULONG carry,
573                                          BN_ULONG *out_carry) {
574   assert(carry == 0 || carry == 1);
575 #if defined(BN_ULLONG)
576   BN_ULLONG ret = carry;
577   ret += (BN_ULLONG)x + y;
578   *out_carry = (BN_ULONG)(ret >> BN_BITS2);
579   return (BN_ULONG)ret;
580 #else
581   x += carry;
582   carry = x < carry;
583   BN_ULONG ret = x + y;
584   carry += ret < x;
585   *out_carry = carry;
586   return ret;
587 #endif
588 }
589 
590 // bn_sub_with_borrow returns |x - y - borrow|, and sets |*out_borrow| to the
591 // borrow bit. |borrow| must be zero or one.
bn_sub_with_borrow(BN_ULONG x,BN_ULONG y,BN_ULONG borrow,BN_ULONG * out_borrow)592 static inline BN_ULONG bn_sub_with_borrow(BN_ULONG x, BN_ULONG y,
593                                           BN_ULONG borrow,
594                                           BN_ULONG *out_borrow) {
595   assert(borrow == 0 || borrow == 1);
596   BN_ULONG ret = x - y - borrow;
597   *out_borrow = (x < y) | ((x == y) & borrow);
598   return ret;
599 }
600 
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)601 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
602                       size_t n) {
603   if (n == 0) {
604     return 0;
605   }
606 
607   BN_ULONG carry = 0;
608   while (n & ~3) {
609     r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
610     r[1] = bn_add_with_carry(a[1], b[1], carry, &carry);
611     r[2] = bn_add_with_carry(a[2], b[2], carry, &carry);
612     r[3] = bn_add_with_carry(a[3], b[3], carry, &carry);
613     a += 4;
614     b += 4;
615     r += 4;
616     n -= 4;
617   }
618   while (n) {
619     r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
620     a++;
621     b++;
622     r++;
623     n--;
624   }
625   return carry;
626 }
627 
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)628 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
629                       size_t n) {
630   if (n == 0) {
631     return (BN_ULONG)0;
632   }
633 
634   BN_ULONG borrow = 0;
635   while (n & ~3) {
636     r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
637     r[1] = bn_sub_with_borrow(a[1], b[1], borrow, &borrow);
638     r[2] = bn_sub_with_borrow(a[2], b[2], borrow, &borrow);
639     r[3] = bn_sub_with_borrow(a[3], b[3], borrow, &borrow);
640     a += 4;
641     b += 4;
642     r += 4;
643     n -= 4;
644   }
645   while (n) {
646     r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
647     a++;
648     b++;
649     r++;
650     n--;
651   }
652   return borrow;
653 }
654 
655 #endif  // !BN_ADD_ASM
656