1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2 * All rights reserved.
3 *
4 * This package is an SSL implementation written
5 * by Eric Young (eay@cryptsoft.com).
6 * The implementation was written so as to conform with Netscapes SSL.
7 *
8 * This library is free for commercial and non-commercial use as long as
9 * the following conditions are aheared to. The following conditions
10 * apply to all code found in this distribution, be it the RC4, RSA,
11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
12 * included with this distribution is covered by the same copyright terms
13 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14 *
15 * Copyright remains Eric Young's, and as such any Copyright notices in
16 * the code are not to be removed.
17 * If this package is used in a product, Eric Young should be given attribution
18 * as the author of the parts of the library used.
19 * This can be in the form of a textual message at program startup or
20 * in documentation (online or textual) provided with the package.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 * must display the following acknowledgement:
32 * "This product includes cryptographic software written by
33 * Eric Young (eay@cryptsoft.com)"
34 * The word 'cryptographic' can be left out if the rouines from the library
35 * being used are not cryptographic related :-).
36 * 4. If you include any Windows specific code (or a derivative thereof) from
37 * the apps directory (application code) you must include an acknowledgement:
38 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * The licence and distribution terms for any publically available version or
53 * derivative of this code cannot be changed. i.e. this code cannot simply be
54 * copied and put under another distribution licence
55 * [including the GNU Public Licence.] */
56
57 #include <openssl/bn.h>
58
59 #include <assert.h>
60
61 #include "internal.h"
62
63
64 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)
65 // See asm/bn-586.pl.
66 #define BN_ADD_ASM
67 #define BN_MUL_ASM
68 #endif
69
70 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
71 (defined(__GNUC__) || defined(__clang__))
72 // See asm/x86_64-gcc.c
73 #define BN_ADD_ASM
74 #define BN_MUL_ASM
75 #endif
76
77 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64)
78 // See asm/bn-armv8.pl.
79 #define BN_ADD_ASM
80 #endif
81
82 #if !defined(BN_MUL_ASM)
83
84 #ifdef BN_ULLONG
85 #define mul_add(r, a, w, c) \
86 do { \
87 BN_ULLONG t; \
88 t = (BN_ULLONG)(w) * (a) + (r) + (c); \
89 (r) = Lw(t); \
90 (c) = Hw(t); \
91 } while (0)
92
93 #define mul(r, a, w, c) \
94 do { \
95 BN_ULLONG t; \
96 t = (BN_ULLONG)(w) * (a) + (c); \
97 (r) = Lw(t); \
98 (c) = Hw(t); \
99 } while (0)
100
101 #define sqr(r0, r1, a) \
102 do { \
103 BN_ULLONG t; \
104 t = (BN_ULLONG)(a) * (a); \
105 (r0) = Lw(t); \
106 (r1) = Hw(t); \
107 } while (0)
108
109 #else
110
111 #define mul_add(r, a, w, c) \
112 do { \
113 BN_ULONG high, low, ret, tmp = (a); \
114 ret = (r); \
115 BN_UMULT_LOHI(low, high, w, tmp); \
116 ret += (c); \
117 (c) = (ret < (c)) ? 1 : 0; \
118 (c) += high; \
119 ret += low; \
120 (c) += (ret < low) ? 1 : 0; \
121 (r) = ret; \
122 } while (0)
123
124 #define mul(r, a, w, c) \
125 do { \
126 BN_ULONG high, low, ret, ta = (a); \
127 BN_UMULT_LOHI(low, high, w, ta); \
128 ret = low + (c); \
129 (c) = high; \
130 (c) += (ret < low) ? 1 : 0; \
131 (r) = ret; \
132 } while (0)
133
134 #define sqr(r0, r1, a) \
135 do { \
136 BN_ULONG tmp = (a); \
137 BN_UMULT_LOHI(r0, r1, tmp, tmp); \
138 } while (0)
139
140 #endif // !BN_ULLONG
141
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)142 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
143 BN_ULONG w) {
144 BN_ULONG c1 = 0;
145
146 if (num == 0) {
147 return c1;
148 }
149
150 while (num & ~3) {
151 mul_add(rp[0], ap[0], w, c1);
152 mul_add(rp[1], ap[1], w, c1);
153 mul_add(rp[2], ap[2], w, c1);
154 mul_add(rp[3], ap[3], w, c1);
155 ap += 4;
156 rp += 4;
157 num -= 4;
158 }
159
160 while (num) {
161 mul_add(rp[0], ap[0], w, c1);
162 ap++;
163 rp++;
164 num--;
165 }
166
167 return c1;
168 }
169
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,size_t num,BN_ULONG w)170 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
171 BN_ULONG w) {
172 BN_ULONG c1 = 0;
173
174 if (num == 0) {
175 return c1;
176 }
177
178 while (num & ~3) {
179 mul(rp[0], ap[0], w, c1);
180 mul(rp[1], ap[1], w, c1);
181 mul(rp[2], ap[2], w, c1);
182 mul(rp[3], ap[3], w, c1);
183 ap += 4;
184 rp += 4;
185 num -= 4;
186 }
187 while (num) {
188 mul(rp[0], ap[0], w, c1);
189 ap++;
190 rp++;
191 num--;
192 }
193 return c1;
194 }
195
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,size_t n)196 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
197 if (n == 0) {
198 return;
199 }
200
201 while (n & ~3) {
202 sqr(r[0], r[1], a[0]);
203 sqr(r[2], r[3], a[1]);
204 sqr(r[4], r[5], a[2]);
205 sqr(r[6], r[7], a[3]);
206 a += 4;
207 r += 8;
208 n -= 4;
209 }
210 while (n) {
211 sqr(r[0], r[1], a[0]);
212 a++;
213 r += 2;
214 n--;
215 }
216 }
217
218 // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
219 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
220 // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
221 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
222
223 #ifdef BN_ULLONG
224
225 // Keep in mind that additions to multiplication result can not overflow,
226 // because its high half cannot be all-ones.
227 #define mul_add_c(a, b, c0, c1, c2) \
228 do { \
229 BN_ULONG hi; \
230 BN_ULLONG t = (BN_ULLONG)(a) * (b); \
231 t += (c0); /* no carry */ \
232 (c0) = (BN_ULONG)Lw(t); \
233 hi = (BN_ULONG)Hw(t); \
234 (c1) += (hi); \
235 (c2) += (c1) < hi; \
236 } while (0)
237
238 #define mul_add_c2(a, b, c0, c1, c2) \
239 do { \
240 BN_ULONG hi; \
241 BN_ULLONG t = (BN_ULLONG)(a) * (b); \
242 BN_ULLONG tt = t + (c0); /* no carry */ \
243 (c0) = (BN_ULONG)Lw(tt); \
244 hi = (BN_ULONG)Hw(tt); \
245 (c1) += hi; \
246 (c2) += (c1) < hi; \
247 t += (c0); /* no carry */ \
248 (c0) = (BN_ULONG)Lw(t); \
249 hi = (BN_ULONG)Hw(t); \
250 (c1) += hi; \
251 (c2) += (c1) < hi; \
252 } while (0)
253
254 #define sqr_add_c(a, i, c0, c1, c2) \
255 do { \
256 BN_ULONG hi; \
257 BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
258 t += (c0); /* no carry */ \
259 (c0) = (BN_ULONG)Lw(t); \
260 hi = (BN_ULONG)Hw(t); \
261 (c1) += hi; \
262 (c2) += (c1) < hi; \
263 } while (0)
264
265 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
266
267 #else
268
269 // Keep in mind that additions to hi can not overflow, because the high word of
270 // a multiplication result cannot be all-ones.
271 #define mul_add_c(a, b, c0, c1, c2) \
272 do { \
273 BN_ULONG ta = (a), tb = (b); \
274 BN_ULONG lo, hi; \
275 BN_UMULT_LOHI(lo, hi, ta, tb); \
276 (c0) += lo; \
277 hi += ((c0) < lo) ? 1 : 0; \
278 (c1) += hi; \
279 (c2) += ((c1) < hi) ? 1 : 0; \
280 } while (0)
281
282 #define mul_add_c2(a, b, c0, c1, c2) \
283 do { \
284 BN_ULONG ta = (a), tb = (b); \
285 BN_ULONG lo, hi, tt; \
286 BN_UMULT_LOHI(lo, hi, ta, tb); \
287 (c0) += lo; \
288 tt = hi + (((c0) < lo) ? 1 : 0); \
289 (c1) += tt; \
290 (c2) += ((c1) < tt) ? 1 : 0; \
291 (c0) += lo; \
292 hi += (c0 < lo) ? 1 : 0; \
293 (c1) += hi; \
294 (c2) += ((c1) < hi) ? 1 : 0; \
295 } while (0)
296
297 #define sqr_add_c(a, i, c0, c1, c2) \
298 do { \
299 BN_ULONG ta = (a)[i]; \
300 BN_ULONG lo, hi; \
301 BN_UMULT_LOHI(lo, hi, ta, ta); \
302 (c0) += lo; \
303 hi += (c0 < lo) ? 1 : 0; \
304 (c1) += hi; \
305 (c2) += ((c1) < hi) ? 1 : 0; \
306 } while (0)
307
308 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
309
310 #endif // !BN_ULLONG
311
bn_mul_comba8(BN_ULONG r[16],const BN_ULONG a[8],const BN_ULONG b[8])312 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
313 BN_ULONG c1, c2, c3;
314
315 c1 = 0;
316 c2 = 0;
317 c3 = 0;
318 mul_add_c(a[0], b[0], c1, c2, c3);
319 r[0] = c1;
320 c1 = 0;
321 mul_add_c(a[0], b[1], c2, c3, c1);
322 mul_add_c(a[1], b[0], c2, c3, c1);
323 r[1] = c2;
324 c2 = 0;
325 mul_add_c(a[2], b[0], c3, c1, c2);
326 mul_add_c(a[1], b[1], c3, c1, c2);
327 mul_add_c(a[0], b[2], c3, c1, c2);
328 r[2] = c3;
329 c3 = 0;
330 mul_add_c(a[0], b[3], c1, c2, c3);
331 mul_add_c(a[1], b[2], c1, c2, c3);
332 mul_add_c(a[2], b[1], c1, c2, c3);
333 mul_add_c(a[3], b[0], c1, c2, c3);
334 r[3] = c1;
335 c1 = 0;
336 mul_add_c(a[4], b[0], c2, c3, c1);
337 mul_add_c(a[3], b[1], c2, c3, c1);
338 mul_add_c(a[2], b[2], c2, c3, c1);
339 mul_add_c(a[1], b[3], c2, c3, c1);
340 mul_add_c(a[0], b[4], c2, c3, c1);
341 r[4] = c2;
342 c2 = 0;
343 mul_add_c(a[0], b[5], c3, c1, c2);
344 mul_add_c(a[1], b[4], c3, c1, c2);
345 mul_add_c(a[2], b[3], c3, c1, c2);
346 mul_add_c(a[3], b[2], c3, c1, c2);
347 mul_add_c(a[4], b[1], c3, c1, c2);
348 mul_add_c(a[5], b[0], c3, c1, c2);
349 r[5] = c3;
350 c3 = 0;
351 mul_add_c(a[6], b[0], c1, c2, c3);
352 mul_add_c(a[5], b[1], c1, c2, c3);
353 mul_add_c(a[4], b[2], c1, c2, c3);
354 mul_add_c(a[3], b[3], c1, c2, c3);
355 mul_add_c(a[2], b[4], c1, c2, c3);
356 mul_add_c(a[1], b[5], c1, c2, c3);
357 mul_add_c(a[0], b[6], c1, c2, c3);
358 r[6] = c1;
359 c1 = 0;
360 mul_add_c(a[0], b[7], c2, c3, c1);
361 mul_add_c(a[1], b[6], c2, c3, c1);
362 mul_add_c(a[2], b[5], c2, c3, c1);
363 mul_add_c(a[3], b[4], c2, c3, c1);
364 mul_add_c(a[4], b[3], c2, c3, c1);
365 mul_add_c(a[5], b[2], c2, c3, c1);
366 mul_add_c(a[6], b[1], c2, c3, c1);
367 mul_add_c(a[7], b[0], c2, c3, c1);
368 r[7] = c2;
369 c2 = 0;
370 mul_add_c(a[7], b[1], c3, c1, c2);
371 mul_add_c(a[6], b[2], c3, c1, c2);
372 mul_add_c(a[5], b[3], c3, c1, c2);
373 mul_add_c(a[4], b[4], c3, c1, c2);
374 mul_add_c(a[3], b[5], c3, c1, c2);
375 mul_add_c(a[2], b[6], c3, c1, c2);
376 mul_add_c(a[1], b[7], c3, c1, c2);
377 r[8] = c3;
378 c3 = 0;
379 mul_add_c(a[2], b[7], c1, c2, c3);
380 mul_add_c(a[3], b[6], c1, c2, c3);
381 mul_add_c(a[4], b[5], c1, c2, c3);
382 mul_add_c(a[5], b[4], c1, c2, c3);
383 mul_add_c(a[6], b[3], c1, c2, c3);
384 mul_add_c(a[7], b[2], c1, c2, c3);
385 r[9] = c1;
386 c1 = 0;
387 mul_add_c(a[7], b[3], c2, c3, c1);
388 mul_add_c(a[6], b[4], c2, c3, c1);
389 mul_add_c(a[5], b[5], c2, c3, c1);
390 mul_add_c(a[4], b[6], c2, c3, c1);
391 mul_add_c(a[3], b[7], c2, c3, c1);
392 r[10] = c2;
393 c2 = 0;
394 mul_add_c(a[4], b[7], c3, c1, c2);
395 mul_add_c(a[5], b[6], c3, c1, c2);
396 mul_add_c(a[6], b[5], c3, c1, c2);
397 mul_add_c(a[7], b[4], c3, c1, c2);
398 r[11] = c3;
399 c3 = 0;
400 mul_add_c(a[7], b[5], c1, c2, c3);
401 mul_add_c(a[6], b[6], c1, c2, c3);
402 mul_add_c(a[5], b[7], c1, c2, c3);
403 r[12] = c1;
404 c1 = 0;
405 mul_add_c(a[6], b[7], c2, c3, c1);
406 mul_add_c(a[7], b[6], c2, c3, c1);
407 r[13] = c2;
408 c2 = 0;
409 mul_add_c(a[7], b[7], c3, c1, c2);
410 r[14] = c3;
411 r[15] = c1;
412 }
413
bn_mul_comba4(BN_ULONG r[8],const BN_ULONG a[4],const BN_ULONG b[4])414 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
415 BN_ULONG c1, c2, c3;
416
417 c1 = 0;
418 c2 = 0;
419 c3 = 0;
420 mul_add_c(a[0], b[0], c1, c2, c3);
421 r[0] = c1;
422 c1 = 0;
423 mul_add_c(a[0], b[1], c2, c3, c1);
424 mul_add_c(a[1], b[0], c2, c3, c1);
425 r[1] = c2;
426 c2 = 0;
427 mul_add_c(a[2], b[0], c3, c1, c2);
428 mul_add_c(a[1], b[1], c3, c1, c2);
429 mul_add_c(a[0], b[2], c3, c1, c2);
430 r[2] = c3;
431 c3 = 0;
432 mul_add_c(a[0], b[3], c1, c2, c3);
433 mul_add_c(a[1], b[2], c1, c2, c3);
434 mul_add_c(a[2], b[1], c1, c2, c3);
435 mul_add_c(a[3], b[0], c1, c2, c3);
436 r[3] = c1;
437 c1 = 0;
438 mul_add_c(a[3], b[1], c2, c3, c1);
439 mul_add_c(a[2], b[2], c2, c3, c1);
440 mul_add_c(a[1], b[3], c2, c3, c1);
441 r[4] = c2;
442 c2 = 0;
443 mul_add_c(a[2], b[3], c3, c1, c2);
444 mul_add_c(a[3], b[2], c3, c1, c2);
445 r[5] = c3;
446 c3 = 0;
447 mul_add_c(a[3], b[3], c1, c2, c3);
448 r[6] = c1;
449 r[7] = c2;
450 }
451
bn_sqr_comba8(BN_ULONG r[16],const BN_ULONG a[8])452 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
453 BN_ULONG c1, c2, c3;
454
455 c1 = 0;
456 c2 = 0;
457 c3 = 0;
458 sqr_add_c(a, 0, c1, c2, c3);
459 r[0] = c1;
460 c1 = 0;
461 sqr_add_c2(a, 1, 0, c2, c3, c1);
462 r[1] = c2;
463 c2 = 0;
464 sqr_add_c(a, 1, c3, c1, c2);
465 sqr_add_c2(a, 2, 0, c3, c1, c2);
466 r[2] = c3;
467 c3 = 0;
468 sqr_add_c2(a, 3, 0, c1, c2, c3);
469 sqr_add_c2(a, 2, 1, c1, c2, c3);
470 r[3] = c1;
471 c1 = 0;
472 sqr_add_c(a, 2, c2, c3, c1);
473 sqr_add_c2(a, 3, 1, c2, c3, c1);
474 sqr_add_c2(a, 4, 0, c2, c3, c1);
475 r[4] = c2;
476 c2 = 0;
477 sqr_add_c2(a, 5, 0, c3, c1, c2);
478 sqr_add_c2(a, 4, 1, c3, c1, c2);
479 sqr_add_c2(a, 3, 2, c3, c1, c2);
480 r[5] = c3;
481 c3 = 0;
482 sqr_add_c(a, 3, c1, c2, c3);
483 sqr_add_c2(a, 4, 2, c1, c2, c3);
484 sqr_add_c2(a, 5, 1, c1, c2, c3);
485 sqr_add_c2(a, 6, 0, c1, c2, c3);
486 r[6] = c1;
487 c1 = 0;
488 sqr_add_c2(a, 7, 0, c2, c3, c1);
489 sqr_add_c2(a, 6, 1, c2, c3, c1);
490 sqr_add_c2(a, 5, 2, c2, c3, c1);
491 sqr_add_c2(a, 4, 3, c2, c3, c1);
492 r[7] = c2;
493 c2 = 0;
494 sqr_add_c(a, 4, c3, c1, c2);
495 sqr_add_c2(a, 5, 3, c3, c1, c2);
496 sqr_add_c2(a, 6, 2, c3, c1, c2);
497 sqr_add_c2(a, 7, 1, c3, c1, c2);
498 r[8] = c3;
499 c3 = 0;
500 sqr_add_c2(a, 7, 2, c1, c2, c3);
501 sqr_add_c2(a, 6, 3, c1, c2, c3);
502 sqr_add_c2(a, 5, 4, c1, c2, c3);
503 r[9] = c1;
504 c1 = 0;
505 sqr_add_c(a, 5, c2, c3, c1);
506 sqr_add_c2(a, 6, 4, c2, c3, c1);
507 sqr_add_c2(a, 7, 3, c2, c3, c1);
508 r[10] = c2;
509 c2 = 0;
510 sqr_add_c2(a, 7, 4, c3, c1, c2);
511 sqr_add_c2(a, 6, 5, c3, c1, c2);
512 r[11] = c3;
513 c3 = 0;
514 sqr_add_c(a, 6, c1, c2, c3);
515 sqr_add_c2(a, 7, 5, c1, c2, c3);
516 r[12] = c1;
517 c1 = 0;
518 sqr_add_c2(a, 7, 6, c2, c3, c1);
519 r[13] = c2;
520 c2 = 0;
521 sqr_add_c(a, 7, c3, c1, c2);
522 r[14] = c3;
523 r[15] = c1;
524 }
525
bn_sqr_comba4(BN_ULONG r[8],const BN_ULONG a[4])526 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
527 BN_ULONG c1, c2, c3;
528
529 c1 = 0;
530 c2 = 0;
531 c3 = 0;
532 sqr_add_c(a, 0, c1, c2, c3);
533 r[0] = c1;
534 c1 = 0;
535 sqr_add_c2(a, 1, 0, c2, c3, c1);
536 r[1] = c2;
537 c2 = 0;
538 sqr_add_c(a, 1, c3, c1, c2);
539 sqr_add_c2(a, 2, 0, c3, c1, c2);
540 r[2] = c3;
541 c3 = 0;
542 sqr_add_c2(a, 3, 0, c1, c2, c3);
543 sqr_add_c2(a, 2, 1, c1, c2, c3);
544 r[3] = c1;
545 c1 = 0;
546 sqr_add_c(a, 2, c2, c3, c1);
547 sqr_add_c2(a, 3, 1, c2, c3, c1);
548 r[4] = c2;
549 c2 = 0;
550 sqr_add_c2(a, 3, 2, c3, c1, c2);
551 r[5] = c3;
552 c3 = 0;
553 sqr_add_c(a, 3, c1, c2, c3);
554 r[6] = c1;
555 r[7] = c2;
556 }
557
558 #undef mul_add
559 #undef mul
560 #undef sqr
561 #undef mul_add_c
562 #undef mul_add_c2
563 #undef sqr_add_c
564 #undef sqr_add_c2
565
566 #endif // !BN_MUL_ASM
567
568 #if !defined(BN_ADD_ASM)
569
570 // bn_add_with_carry returns |x + y + carry|, and sets |*out_carry| to the
571 // carry bit. |carry| must be zero or one.
bn_add_with_carry(BN_ULONG x,BN_ULONG y,BN_ULONG carry,BN_ULONG * out_carry)572 static inline BN_ULONG bn_add_with_carry(BN_ULONG x, BN_ULONG y, BN_ULONG carry,
573 BN_ULONG *out_carry) {
574 assert(carry == 0 || carry == 1);
575 #if defined(BN_ULLONG)
576 BN_ULLONG ret = carry;
577 ret += (BN_ULLONG)x + y;
578 *out_carry = (BN_ULONG)(ret >> BN_BITS2);
579 return (BN_ULONG)ret;
580 #else
581 x += carry;
582 carry = x < carry;
583 BN_ULONG ret = x + y;
584 carry += ret < x;
585 *out_carry = carry;
586 return ret;
587 #endif
588 }
589
590 // bn_sub_with_borrow returns |x - y - borrow|, and sets |*out_borrow| to the
591 // borrow bit. |borrow| must be zero or one.
bn_sub_with_borrow(BN_ULONG x,BN_ULONG y,BN_ULONG borrow,BN_ULONG * out_borrow)592 static inline BN_ULONG bn_sub_with_borrow(BN_ULONG x, BN_ULONG y,
593 BN_ULONG borrow,
594 BN_ULONG *out_borrow) {
595 assert(borrow == 0 || borrow == 1);
596 BN_ULONG ret = x - y - borrow;
597 *out_borrow = (x < y) | ((x == y) & borrow);
598 return ret;
599 }
600
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)601 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
602 size_t n) {
603 if (n == 0) {
604 return 0;
605 }
606
607 BN_ULONG carry = 0;
608 while (n & ~3) {
609 r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
610 r[1] = bn_add_with_carry(a[1], b[1], carry, &carry);
611 r[2] = bn_add_with_carry(a[2], b[2], carry, &carry);
612 r[3] = bn_add_with_carry(a[3], b[3], carry, &carry);
613 a += 4;
614 b += 4;
615 r += 4;
616 n -= 4;
617 }
618 while (n) {
619 r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
620 a++;
621 b++;
622 r++;
623 n--;
624 }
625 return carry;
626 }
627
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,size_t n)628 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
629 size_t n) {
630 if (n == 0) {
631 return (BN_ULONG)0;
632 }
633
634 BN_ULONG borrow = 0;
635 while (n & ~3) {
636 r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
637 r[1] = bn_sub_with_borrow(a[1], b[1], borrow, &borrow);
638 r[2] = bn_sub_with_borrow(a[2], b[2], borrow, &borrow);
639 r[3] = bn_sub_with_borrow(a[3], b[3], borrow, &borrow);
640 a += 4;
641 b += 4;
642 r += 4;
643 n -= 4;
644 }
645 while (n) {
646 r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
647 a++;
648 b++;
649 r++;
650 n--;
651 }
652 return borrow;
653 }
654
655 #endif // !BN_ADD_ASM
656