• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.]
56  */
57 /* ====================================================================
58  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
59  *
60  * Redistribution and use in source and binary forms, with or without
61  * modification, are permitted provided that the following conditions
62  * are met:
63  *
64  * 1. Redistributions of source code must retain the above copyright
65  *    notice, this list of conditions and the following disclaimer.
66  *
67  * 2. Redistributions in binary form must reproduce the above copyright
68  *    notice, this list of conditions and the following disclaimer in
69  *    the documentation and/or other materials provided with the
70  *    distribution.
71  *
72  * 3. All advertising materials mentioning features or use of this
73  *    software must display the following acknowledgment:
74  *    "This product includes software developed by the OpenSSL Project
75  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
76  *
77  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
78  *    endorse or promote products derived from this software without
79  *    prior written permission. For written permission, please contact
80  *    openssl-core@openssl.org.
81  *
82  * 5. Products derived from this software may not be called "OpenSSL"
83  *    nor may "OpenSSL" appear in their names without prior written
84  *    permission of the OpenSSL Project.
85  *
86  * 6. Redistributions of any form whatsoever must retain the following
87  *    acknowledgment:
88  *    "This product includes software developed by the OpenSSL Project
89  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
90  *
91  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
92  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
95  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
96  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
97  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
98  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
100  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
101  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
102  * OF THE POSSIBILITY OF SUCH DAMAGE.
103  * ====================================================================
104  *
105  * This product includes cryptographic software written by Eric Young
106  * (eay@cryptsoft.com).  This product includes software written by Tim
107  * Hudson (tjh@cryptsoft.com). */
108 
109 #include <openssl/bn.h>
110 
111 #include <assert.h>
112 #include <limits.h>
113 #include <stdlib.h>
114 #include <string.h>
115 
116 #include <openssl/err.h>
117 #include <openssl/mem.h>
118 
119 #include "internal.h"
120 #include "rsaz_exp.h"
121 
122 
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)123 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
124   int i, bits, ret = 0;
125   BIGNUM *v, *rr;
126 
127   BN_CTX_start(ctx);
128   if (r == a || r == p) {
129     rr = BN_CTX_get(ctx);
130   } else {
131     rr = r;
132   }
133 
134   v = BN_CTX_get(ctx);
135   if (rr == NULL || v == NULL) {
136     goto err;
137   }
138 
139   if (BN_copy(v, a) == NULL) {
140     goto err;
141   }
142   bits = BN_num_bits(p);
143 
144   if (BN_is_odd(p)) {
145     if (BN_copy(rr, a) == NULL) {
146       goto err;
147     }
148   } else {
149     if (!BN_one(rr)) {
150       goto err;
151     }
152   }
153 
154   for (i = 1; i < bits; i++) {
155     if (!BN_sqr(v, v, ctx)) {
156       goto err;
157     }
158     if (BN_is_bit_set(p, i)) {
159       if (!BN_mul(rr, rr, v, ctx)) {
160         goto err;
161       }
162     }
163   }
164 
165   if (r != rr && !BN_copy(r, rr)) {
166     goto err;
167   }
168   ret = 1;
169 
170 err:
171   BN_CTX_end(ctx);
172   return ret;
173 }
174 
175 typedef struct bn_recp_ctx_st {
176   BIGNUM N;   // the divisor
177   BIGNUM Nr;  // the reciprocal
178   int num_bits;
179   int shift;
180   int flags;
181 } BN_RECP_CTX;
182 
BN_RECP_CTX_init(BN_RECP_CTX * recp)183 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
184   BN_init(&recp->N);
185   BN_init(&recp->Nr);
186   recp->num_bits = 0;
187   recp->shift = 0;
188   recp->flags = 0;
189 }
190 
BN_RECP_CTX_free(BN_RECP_CTX * recp)191 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
192   if (recp == NULL) {
193     return;
194   }
195 
196   BN_free(&recp->N);
197   BN_free(&recp->Nr);
198 }
199 
BN_RECP_CTX_set(BN_RECP_CTX * recp,const BIGNUM * d,BN_CTX * ctx)200 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
201   if (!BN_copy(&(recp->N), d)) {
202     return 0;
203   }
204   BN_zero(&recp->Nr);
205   recp->num_bits = BN_num_bits(d);
206   recp->shift = 0;
207 
208   return 1;
209 }
210 
211 // len is the expected size of the result We actually calculate with an extra
212 // word of precision, so we can do faster division if the remainder is not
213 // required.
214 // r := 2^len / m
BN_reciprocal(BIGNUM * r,const BIGNUM * m,int len,BN_CTX * ctx)215 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
216   int ret = -1;
217   BIGNUM *t;
218 
219   BN_CTX_start(ctx);
220   t = BN_CTX_get(ctx);
221   if (t == NULL) {
222     goto err;
223   }
224 
225   if (!BN_set_bit(t, len)) {
226     goto err;
227   }
228 
229   if (!BN_div(r, NULL, t, m, ctx)) {
230     goto err;
231   }
232 
233   ret = len;
234 
235 err:
236   BN_CTX_end(ctx);
237   return ret;
238 }
239 
BN_div_recp(BIGNUM * dv,BIGNUM * rem,const BIGNUM * m,BN_RECP_CTX * recp,BN_CTX * ctx)240 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
241                        BN_RECP_CTX *recp, BN_CTX *ctx) {
242   int i, j, ret = 0;
243   BIGNUM *a, *b, *d, *r;
244 
245   BN_CTX_start(ctx);
246   a = BN_CTX_get(ctx);
247   b = BN_CTX_get(ctx);
248   if (dv != NULL) {
249     d = dv;
250   } else {
251     d = BN_CTX_get(ctx);
252   }
253 
254   if (rem != NULL) {
255     r = rem;
256   } else {
257     r = BN_CTX_get(ctx);
258   }
259 
260   if (a == NULL || b == NULL || d == NULL || r == NULL) {
261     goto err;
262   }
263 
264   if (BN_ucmp(m, &recp->N) < 0) {
265     BN_zero(d);
266     if (!BN_copy(r, m)) {
267       goto err;
268     }
269     BN_CTX_end(ctx);
270     return 1;
271   }
272 
273   // We want the remainder
274   // Given input of ABCDEF / ab
275   // we need multiply ABCDEF by 3 digests of the reciprocal of ab
276 
277   // i := max(BN_num_bits(m), 2*BN_num_bits(N))
278   i = BN_num_bits(m);
279   j = recp->num_bits << 1;
280   if (j > i) {
281     i = j;
282   }
283 
284   // Nr := round(2^i / N)
285   if (i != recp->shift) {
286     recp->shift =
287         BN_reciprocal(&(recp->Nr), &(recp->N), i,
288                       ctx);  // BN_reciprocal returns i, or -1 for an error
289   }
290 
291   if (recp->shift == -1) {
292     goto err;
293   }
294 
295   // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
296   // BN_num_bits(N)))|
297   //    = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
298   // BN_num_bits(N)))|
299   //   <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
300   //    = |m/N|
301   if (!BN_rshift(a, m, recp->num_bits)) {
302     goto err;
303   }
304   if (!BN_mul(b, a, &(recp->Nr), ctx)) {
305     goto err;
306   }
307   if (!BN_rshift(d, b, i - recp->num_bits)) {
308     goto err;
309   }
310   d->neg = 0;
311 
312   if (!BN_mul(b, &(recp->N), d, ctx)) {
313     goto err;
314   }
315   if (!BN_usub(r, m, b)) {
316     goto err;
317   }
318   r->neg = 0;
319 
320   j = 0;
321   while (BN_ucmp(r, &(recp->N)) >= 0) {
322     if (j++ > 2) {
323       OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
324       goto err;
325     }
326     if (!BN_usub(r, r, &(recp->N))) {
327       goto err;
328     }
329     if (!BN_add_word(d, 1)) {
330       goto err;
331     }
332   }
333 
334   r->neg = BN_is_zero(r) ? 0 : m->neg;
335   d->neg = m->neg ^ recp->N.neg;
336   ret = 1;
337 
338 err:
339   BN_CTX_end(ctx);
340   return ret;
341 }
342 
BN_mod_mul_reciprocal(BIGNUM * r,const BIGNUM * x,const BIGNUM * y,BN_RECP_CTX * recp,BN_CTX * ctx)343 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
344                                  BN_RECP_CTX *recp, BN_CTX *ctx) {
345   int ret = 0;
346   BIGNUM *a;
347   const BIGNUM *ca;
348 
349   BN_CTX_start(ctx);
350   a = BN_CTX_get(ctx);
351   if (a == NULL) {
352     goto err;
353   }
354 
355   if (y != NULL) {
356     if (x == y) {
357       if (!BN_sqr(a, x, ctx)) {
358         goto err;
359       }
360     } else {
361       if (!BN_mul(a, x, y, ctx)) {
362         goto err;
363       }
364     }
365     ca = a;
366   } else {
367     ca = x;  // Just do the mod
368   }
369 
370   ret = BN_div_recp(NULL, r, ca, recp, ctx);
371 
372 err:
373   BN_CTX_end(ctx);
374   return ret;
375 }
376 
377 // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with
378 // a |b| bit exponent.
379 //
380 // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
381 // multiplications is a constant plus on average
382 //
383 //    2^(w-1) + (b-w)/(w+1);
384 //
385 // here 2^(w-1)  is for precomputing the table (we actually need entries only
386 // for windows that have the lowest bit set), and (b-w)/(w+1)  is an
387 // approximation for the expected number of w-bit windows, not counting the
388 // first one.
389 //
390 // Thus we should use
391 //
392 //    w >= 6  if        b > 671
393 //     w = 5  if  671 > b > 239
394 //     w = 4  if  239 > b >  79
395 //     w = 3  if   79 > b >  23
396 //    w <= 2  if   23 > b
397 //
398 // (with draws in between).  Very small exponents are often selected
399 // with low Hamming weight, so we use  w = 1  for b <= 23.
BN_window_bits_for_exponent_size(size_t b)400 static int BN_window_bits_for_exponent_size(size_t b) {
401   if (b > 671) {
402     return 6;
403   }
404   if (b > 239) {
405     return 5;
406   }
407   if (b > 79) {
408     return 4;
409   }
410   if (b > 23) {
411     return 3;
412   }
413   return 1;
414 }
415 
416 // TABLE_SIZE is the maximum precomputation table size for *variable* sliding
417 // windows. This must be 2^(max_window - 1), where max_window is the largest
418 // value returned from |BN_window_bits_for_exponent_size|.
419 #define TABLE_SIZE 32
420 
421 // TABLE_BITS_SMALL is the smallest value returned from
422 // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| *
423 // |BN_SMALL_MAX_WORDS| words.
424 #define TABLE_BITS_SMALL 5
425 
426 // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most
427 // |BN_BITS2| * |BN_SMALL_MAX_WORDS|.
428 #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1))
429 
mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)430 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
431                         const BIGNUM *m, BN_CTX *ctx) {
432   int i, j, ret = 0, wstart, window;
433   int start = 1;
434   BIGNUM *aa;
435   // Table of variables obtained from 'ctx'
436   BIGNUM *val[TABLE_SIZE];
437   BN_RECP_CTX recp;
438 
439   // This function is only called on even moduli.
440   assert(!BN_is_odd(m));
441 
442   int bits = BN_num_bits(p);
443   if (bits == 0) {
444     return BN_one(r);
445   }
446 
447   BN_RECP_CTX_init(&recp);
448   BN_CTX_start(ctx);
449   aa = BN_CTX_get(ctx);
450   val[0] = BN_CTX_get(ctx);
451   if (!aa || !val[0]) {
452     goto err;
453   }
454 
455   if (m->neg) {
456     // ignore sign of 'm'
457     if (!BN_copy(aa, m)) {
458       goto err;
459     }
460     aa->neg = 0;
461     if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
462       goto err;
463     }
464   } else {
465     if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
466       goto err;
467     }
468   }
469 
470   if (!BN_nnmod(val[0], a, m, ctx)) {
471     goto err;  // 1
472   }
473   if (BN_is_zero(val[0])) {
474     BN_zero(r);
475     ret = 1;
476     goto err;
477   }
478 
479   window = BN_window_bits_for_exponent_size(bits);
480   if (window > 1) {
481     if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
482       goto err;  // 2
483     }
484     j = 1 << (window - 1);
485     for (i = 1; i < j; i++) {
486       if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
487           !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
488         goto err;
489       }
490     }
491   }
492 
493   start = 1;  // This is used to avoid multiplication etc
494               // when there is only the value '1' in the
495               // buffer.
496   wstart = bits - 1;  // The top bit of the window
497 
498   if (!BN_one(r)) {
499     goto err;
500   }
501 
502   for (;;) {
503     int wvalue;  // The 'value' of the window
504     int wend;  // The bottom bit of the window
505 
506     if (!BN_is_bit_set(p, wstart)) {
507       if (!start) {
508         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
509           goto err;
510         }
511       }
512       if (wstart == 0) {
513         break;
514       }
515       wstart--;
516       continue;
517     }
518 
519     // We now have wstart on a 'set' bit, we now need to work out
520     // how bit a window to do.  To do this we need to scan
521     // forward until the last set bit before the end of the
522     // window
523     wvalue = 1;
524     wend = 0;
525     for (i = 1; i < window; i++) {
526       if (wstart - i < 0) {
527         break;
528       }
529       if (BN_is_bit_set(p, wstart - i)) {
530         wvalue <<= (i - wend);
531         wvalue |= 1;
532         wend = i;
533       }
534     }
535 
536     // wend is the size of the current window
537     j = wend + 1;
538     // add the 'bytes above'
539     if (!start) {
540       for (i = 0; i < j; i++) {
541         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
542           goto err;
543         }
544       }
545     }
546 
547     // wvalue will be an odd number < 2^window
548     if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
549       goto err;
550     }
551 
552     // move the 'window' down further
553     wstart -= wend + 1;
554     start = 0;
555     if (wstart < 0) {
556       break;
557     }
558   }
559   ret = 1;
560 
561 err:
562   BN_CTX_end(ctx);
563   BN_RECP_CTX_free(&recp);
564   return ret;
565 }
566 
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)567 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
568                BN_CTX *ctx) {
569   if (m->neg) {
570     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
571     return 0;
572   }
573   if (a->neg || BN_ucmp(a, m) >= 0) {
574     if (!BN_nnmod(r, a, m, ctx)) {
575       return 0;
576     }
577     a = r;
578   }
579 
580   if (BN_is_odd(m)) {
581     return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
582   }
583 
584   return mod_exp_recp(r, a, p, m, ctx);
585 }
586 
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)587 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
588                     const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
589   if (!BN_is_odd(m)) {
590     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
591     return 0;
592   }
593   if (m->neg) {
594     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
595     return 0;
596   }
597   // |a| is secret, but |a < m| is not.
598   if (a->neg || constant_time_declassify_int(BN_ucmp(a, m)) >= 0) {
599     OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
600     return 0;
601   }
602 
603   int bits = BN_num_bits(p);
604   if (bits == 0) {
605     // x**0 mod 1 is still zero.
606     if (BN_abs_is_word(m, 1)) {
607       BN_zero(rr);
608       return 1;
609     }
610     return BN_one(rr);
611   }
612 
613   int ret = 0;
614   BIGNUM *val[TABLE_SIZE];
615   BN_MONT_CTX *new_mont = NULL;
616 
617   BN_CTX_start(ctx);
618   BIGNUM *r = BN_CTX_get(ctx);
619   val[0] = BN_CTX_get(ctx);
620   if (r == NULL || val[0] == NULL) {
621     goto err;
622   }
623 
624   // Allocate a montgomery context if it was not supplied by the caller.
625   if (mont == NULL) {
626     new_mont = BN_MONT_CTX_new_consttime(m, ctx);
627     if (new_mont == NULL) {
628       goto err;
629     }
630     mont = new_mont;
631   }
632 
633   // We exponentiate by looking at sliding windows of the exponent and
634   // precomputing powers of |a|. Windows may be shifted so they always end on a
635   // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1)
636   // for i = 0 to 2^(window-1), all in Montgomery form.
637   int window = BN_window_bits_for_exponent_size(bits);
638   if (!BN_to_montgomery(val[0], a, mont, ctx)) {
639     goto err;
640   }
641   if (window > 1) {
642     BIGNUM *d = BN_CTX_get(ctx);
643     if (d == NULL ||
644         !BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
645       goto err;
646     }
647     for (int i = 1; i < 1 << (window - 1); i++) {
648       val[i] = BN_CTX_get(ctx);
649       if (val[i] == NULL ||
650           !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
651         goto err;
652       }
653     }
654   }
655 
656   // |p| is non-zero, so at least one window is non-zero. To save some
657   // multiplications, defer initializing |r| until then.
658   int r_is_one = 1;
659   int wstart = bits - 1;  // The top bit of the window.
660   for (;;) {
661     if (!BN_is_bit_set(p, wstart)) {
662       if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
663         goto err;
664       }
665       if (wstart == 0) {
666         break;
667       }
668       wstart--;
669       continue;
670     }
671 
672     // We now have wstart on a set bit. Find the largest window we can use.
673     int wvalue = 1;
674     int wsize = 0;
675     for (int i = 1; i < window && i <= wstart; i++) {
676       if (BN_is_bit_set(p, wstart - i)) {
677         wvalue <<= (i - wsize);
678         wvalue |= 1;
679         wsize = i;
680       }
681     }
682 
683     // Shift |r| to the end of the window.
684     if (!r_is_one) {
685       for (int i = 0; i < wsize + 1; i++) {
686         if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
687           goto err;
688         }
689       }
690     }
691 
692     assert(wvalue & 1);
693     assert(wvalue < (1 << window));
694     if (r_is_one) {
695       if (!BN_copy(r, val[wvalue >> 1])) {
696         goto err;
697       }
698     } else if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
699       goto err;
700     }
701 
702     r_is_one = 0;
703     if (wstart == wsize) {
704       break;
705     }
706     wstart -= wsize + 1;
707   }
708 
709   // |p| is non-zero, so |r_is_one| must be cleared at some point.
710   assert(!r_is_one);
711 
712   if (!BN_from_montgomery(rr, r, mont, ctx)) {
713     goto err;
714   }
715   ret = 1;
716 
717 err:
718   BN_MONT_CTX_free(new_mont);
719   BN_CTX_end(ctx);
720   return ret;
721 }
722 
bn_mod_exp_mont_small(BN_ULONG * r,const BN_ULONG * a,size_t num,const BN_ULONG * p,size_t num_p,const BN_MONT_CTX * mont)723 void bn_mod_exp_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
724                            const BN_ULONG *p, size_t num_p,
725                            const BN_MONT_CTX *mont) {
726   if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS ||
727       num_p > SIZE_MAX / BN_BITS2) {
728     abort();
729   }
730   assert(BN_is_odd(&mont->N));
731 
732   // Count the number of bits in |p|, skipping leading zeros. Note this function
733   // treats |p| as public.
734   while (num_p != 0 && p[num_p - 1] == 0) {
735     num_p--;
736   }
737   if (num_p == 0) {
738     bn_from_montgomery_small(r, num, mont->RR.d, num, mont);
739     return;
740   }
741   size_t bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
742   assert(bits != 0);
743 
744   // We exponentiate by looking at sliding windows of the exponent and
745   // precomputing powers of |a|. Windows may be shifted so they always end on a
746   // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for
747   // i = 0 to 2^(window-1), all in Montgomery form.
748   unsigned window = BN_window_bits_for_exponent_size(bits);
749   if (window > TABLE_BITS_SMALL) {
750     window = TABLE_BITS_SMALL;  // Tolerate excessively large |p|.
751   }
752   BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS];
753   OPENSSL_memcpy(val[0], a, num * sizeof(BN_ULONG));
754   if (window > 1) {
755     BN_ULONG d[BN_SMALL_MAX_WORDS];
756     bn_mod_mul_montgomery_small(d, val[0], val[0], num, mont);
757     for (unsigned i = 1; i < 1u << (window - 1); i++) {
758       bn_mod_mul_montgomery_small(val[i], val[i - 1], d, num, mont);
759     }
760   }
761 
762   // |p| is non-zero, so at least one window is non-zero. To save some
763   // multiplications, defer initializing |r| until then.
764   int r_is_one = 1;
765   size_t wstart = bits - 1;  // The top bit of the window.
766   for (;;) {
767     if (!bn_is_bit_set_words(p, num_p, wstart)) {
768       if (!r_is_one) {
769         bn_mod_mul_montgomery_small(r, r, r, num, mont);
770       }
771       if (wstart == 0) {
772         break;
773       }
774       wstart--;
775       continue;
776     }
777 
778     // We now have wstart on a set bit. Find the largest window we can use.
779     unsigned wvalue = 1;
780     unsigned wsize = 0;
781     for (unsigned i = 1; i < window && i <= wstart; i++) {
782       if (bn_is_bit_set_words(p, num_p, wstart - i)) {
783         wvalue <<= (i - wsize);
784         wvalue |= 1;
785         wsize = i;
786       }
787     }
788 
789     // Shift |r| to the end of the window.
790     if (!r_is_one) {
791       for (unsigned i = 0; i < wsize + 1; i++) {
792         bn_mod_mul_montgomery_small(r, r, r, num, mont);
793       }
794     }
795 
796     assert(wvalue & 1);
797     assert(wvalue < (1u << window));
798     if (r_is_one) {
799       OPENSSL_memcpy(r, val[wvalue >> 1], num * sizeof(BN_ULONG));
800     } else {
801       bn_mod_mul_montgomery_small(r, r, val[wvalue >> 1], num, mont);
802     }
803     r_is_one = 0;
804     if (wstart == wsize) {
805       break;
806     }
807     wstart -= wsize + 1;
808   }
809 
810   // |p| is non-zero, so |r_is_one| must be cleared at some point.
811   assert(!r_is_one);
812   OPENSSL_cleanse(val, sizeof(val));
813 }
814 
bn_mod_inverse0_prime_mont_small(BN_ULONG * r,const BN_ULONG * a,size_t num,const BN_MONT_CTX * mont)815 void bn_mod_inverse0_prime_mont_small(BN_ULONG *r, const BN_ULONG *a,
816                                       size_t num, const BN_MONT_CTX *mont) {
817   if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
818     abort();
819   }
820 
821   // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime.
822   BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS];
823   const BN_ULONG *p = mont->N.d;
824   OPENSSL_memcpy(p_minus_two, p, num * sizeof(BN_ULONG));
825   if (p_minus_two[0] >= 2) {
826     p_minus_two[0] -= 2;
827   } else {
828     p_minus_two[0] -= 2;
829     for (size_t i = 1; i < num; i++) {
830       if (p_minus_two[i]-- != 0) {
831         break;
832       }
833     }
834   }
835 
836   bn_mod_exp_mont_small(r, a, num, p_minus_two, num, mont);
837 }
838 
copy_to_prebuf(const BIGNUM * b,int top,BN_ULONG * table,int idx,int window)839 static void copy_to_prebuf(const BIGNUM *b, int top, BN_ULONG *table, int idx,
840                            int window) {
841   int ret = bn_copy_words(table + idx * top, top, b);
842   assert(ret);  // |b| is guaranteed to fit.
843   (void)ret;
844 }
845 
copy_from_prebuf(BIGNUM * b,int top,const BN_ULONG * table,int idx,int window)846 static int copy_from_prebuf(BIGNUM *b, int top, const BN_ULONG *table, int idx,
847                             int window) {
848   if (!bn_wexpand(b, top)) {
849     return 0;
850   }
851 
852   OPENSSL_memset(b->d, 0, sizeof(BN_ULONG) * top);
853   const int width = 1 << window;
854   for (int i = 0; i < width; i++, table += top) {
855     // Use a value barrier to prevent Clang from adding a branch when |i != idx|
856     // and making this copy not constant time. Clang is still allowed to learn
857     // that |mask| is constant across the inner loop, so this won't inhibit any
858     // vectorization it might do.
859     BN_ULONG mask = value_barrier_w(constant_time_eq_int(i, idx));
860     for (int j = 0; j < top; j++) {
861       b->d[j] |= table[j] & mask;
862     }
863   }
864 
865   b->width = top;
866   return 1;
867 }
868 
869 // Window sizes optimized for fixed window size modular exponentiation
870 // algorithm (BN_mod_exp_mont_consttime).
871 //
872 // TODO(davidben): These window sizes were originally set for 64-byte cache
873 // lines with a cache-line-dependent constant-time mitigation. They can probably
874 // be revised now that our implementation is no longer cache-time-dependent.
875 #define BN_window_bits_for_ctime_exponent_size(b) \
876   ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
877 #define BN_MAX_MOD_EXP_CTIME_WINDOW (6)
878 
879 // This variant of |BN_mod_exp_mont| uses fixed windows and fixed memory access
880 // patterns to protect secret exponents (cf. the hyper-threading timing attacks
881 // pointed out by Colin Percival,
882 // http://www.daemonology.net/hyperthreading-considered-harmful/)
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)883 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
884                               const BIGNUM *m, BN_CTX *ctx,
885                               const BN_MONT_CTX *mont) {
886   int i, ret = 0, wvalue;
887   BN_MONT_CTX *new_mont = NULL;
888 
889   unsigned char *powerbuf_free = NULL;
890   size_t powerbuf_len = 0;
891   BN_ULONG *powerbuf = NULL;
892 
893   if (!BN_is_odd(m)) {
894     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
895     return 0;
896   }
897   if (m->neg) {
898     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
899     return 0;
900   }
901   if (a->neg || BN_ucmp(a, m) >= 0) {
902     OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
903     return 0;
904   }
905 
906   // Use all bits stored in |p|, rather than |BN_num_bits|, so we do not leak
907   // whether the top bits are zero.
908   int max_bits = p->width * BN_BITS2;
909   int bits = max_bits;
910   if (bits == 0) {
911     // x**0 mod 1 is still zero.
912     if (BN_abs_is_word(m, 1)) {
913       BN_zero(rr);
914       return 1;
915     }
916     return BN_one(rr);
917   }
918 
919   // Allocate a montgomery context if it was not supplied by the caller.
920   if (mont == NULL) {
921     new_mont = BN_MONT_CTX_new_consttime(m, ctx);
922     if (new_mont == NULL) {
923       goto err;
924     }
925     mont = new_mont;
926   }
927 
928   // Use the width in |mont->N|, rather than the copy in |m|. The assembly
929   // implementation assumes it can use |top| to size R.
930   int top = mont->N.width;
931 
932 #if defined(OPENSSL_BN_ASM_MONT5) || defined(RSAZ_ENABLED)
933   // Share one large stack-allocated buffer between the RSAZ and non-RSAZ code
934   // paths. If we were to use separate static buffers for each then there is
935   // some chance that both large buffers would be allocated on the stack,
936   // causing the stack space requirement to be truly huge (~10KB).
937   alignas(MOD_EXP_CTIME_ALIGN) BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN];
938 #endif
939 #if defined(RSAZ_ENABLED)
940   // If the size of the operands allow it, perform the optimized RSAZ
941   // exponentiation. For further information see crypto/fipsmodule/bn/rsaz_exp.c
942   // and accompanying assembly modules.
943   if (a->width == 16 && p->width == 16 && BN_num_bits(m) == 1024 &&
944       rsaz_avx2_preferred()) {
945     if (!bn_wexpand(rr, 16)) {
946       goto err;
947     }
948     RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0],
949                            storage);
950     rr->width = 16;
951     rr->neg = 0;
952     ret = 1;
953     goto err;
954   }
955 #endif
956 
957   // Get the window size to use with size of p.
958   int window = BN_window_bits_for_ctime_exponent_size(bits);
959   assert(window <= BN_MAX_MOD_EXP_CTIME_WINDOW);
960 
961   // Calculating |powerbuf_len| below cannot overflow because of the bound on
962   // Montgomery reduction.
963   assert((size_t)top <= BN_MONTGOMERY_MAX_WORDS);
964   static_assert(
965       BN_MONTGOMERY_MAX_WORDS <=
966           INT_MAX / sizeof(BN_ULONG) / ((1 << BN_MAX_MOD_EXP_CTIME_WINDOW) + 3),
967       "powerbuf_len may overflow");
968 
969 #if defined(OPENSSL_BN_ASM_MONT5)
970   if (window >= 5) {
971     window = 5;  // ~5% improvement for RSA2048 sign, and even for RSA4096
972     // Reserve space for the |mont->N| copy.
973     powerbuf_len += top * sizeof(mont->N.d[0]);
974   }
975 #endif
976 
977   // Allocate a buffer large enough to hold all of the pre-computed
978   // powers of |am|, |am| itself, and |tmp|.
979   int num_powers = 1 << window;
980   powerbuf_len += sizeof(m->d[0]) * top * (num_powers + 2);
981 
982 #if defined(OPENSSL_BN_ASM_MONT5)
983   if (powerbuf_len <= sizeof(storage)) {
984     powerbuf = storage;
985   }
986   // |storage| is more than large enough to handle 1024-bit inputs.
987   assert(powerbuf != NULL || top * BN_BITS2 > 1024);
988 #endif
989   if (powerbuf == NULL) {
990     powerbuf_free = OPENSSL_malloc(powerbuf_len + MOD_EXP_CTIME_ALIGN);
991     if (powerbuf_free == NULL) {
992       goto err;
993     }
994     powerbuf = align_pointer(powerbuf_free, MOD_EXP_CTIME_ALIGN);
995   }
996   OPENSSL_memset(powerbuf, 0, powerbuf_len);
997 
998   // Place |tmp| and |am| right after powers table.
999   BIGNUM tmp, am;
1000   tmp.d = powerbuf + top * num_powers;
1001   am.d = tmp.d + top;
1002   tmp.width = am.width = 0;
1003   tmp.dmax = am.dmax = top;
1004   tmp.neg = am.neg = 0;
1005   tmp.flags = am.flags = BN_FLG_STATIC_DATA;
1006 
1007   if (!bn_one_to_montgomery(&tmp, mont, ctx) ||
1008       !bn_resize_words(&tmp, top)) {
1009     goto err;
1010   }
1011 
1012   // Prepare a^1 in the Montgomery domain.
1013   assert(!a->neg);
1014   assert(BN_ucmp(a, m) < 0);
1015   if (!BN_to_montgomery(&am, a, mont, ctx) ||
1016       !bn_resize_words(&am, top)) {
1017     goto err;
1018   }
1019 
1020 #if defined(OPENSSL_BN_ASM_MONT5)
1021   // This optimization uses ideas from https://eprint.iacr.org/2011/239,
1022   // specifically optimization of cache-timing attack countermeasures,
1023   // pre-computation optimization, and Almost Montgomery Multiplication.
1024   //
1025   // The paper discusses a 4-bit window to optimize 512-bit modular
1026   // exponentiation, used in RSA-1024 with CRT, but RSA-1024 is no longer
1027   // important.
1028   //
1029   // |bn_mul_mont_gather5| and |bn_power5| implement the "almost" reduction
1030   // variant, so the values here may not be fully reduced. They are bounded by R
1031   // (i.e. they fit in |top| words), not |m|. Additionally, we pass these
1032   // "almost" reduced inputs into |bn_mul_mont|, which implements the normal
1033   // reduction variant. Given those inputs, |bn_mul_mont| may not give reduced
1034   // output, but it will still produce "almost" reduced output.
1035   //
1036   // TODO(davidben): Using "almost" reduction complicates analysis of this code,
1037   // and its interaction with other parts of the project. Determine whether this
1038   // is actually necessary for performance.
1039   if (window == 5 && top > 1) {
1040     // Copy |mont->N| to improve cache locality.
1041     BN_ULONG *np = am.d + top;
1042     for (i = 0; i < top; i++) {
1043       np[i] = mont->N.d[i];
1044     }
1045 
1046     // Fill |powerbuf| with the first 32 powers of |am|.
1047     const BN_ULONG *n0 = mont->n0;
1048     bn_scatter5(tmp.d, top, powerbuf, 0);
1049     bn_scatter5(am.d, am.width, powerbuf, 1);
1050     bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
1051     bn_scatter5(tmp.d, top, powerbuf, 2);
1052 
1053     // Square to compute powers of two.
1054     for (i = 4; i < 32; i *= 2) {
1055       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1056       bn_scatter5(tmp.d, top, powerbuf, i);
1057     }
1058     // Compute odd powers |i| based on |i - 1|, then all powers |i * 2^j|.
1059     for (i = 3; i < 32; i += 2) {
1060       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
1061       bn_scatter5(tmp.d, top, powerbuf, i);
1062       for (int j = 2 * i; j < 32; j *= 2) {
1063         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1064         bn_scatter5(tmp.d, top, powerbuf, j);
1065       }
1066     }
1067 
1068     bits--;
1069     for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
1070       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1071     }
1072     bn_gather5(tmp.d, top, powerbuf, wvalue);
1073 
1074     // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
1075     // that has not been read yet.)
1076     assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
1077 
1078     // Scan the exponent one window at a time starting from the most
1079     // significant bits.
1080     if (top & 7) {
1081       while (bits >= 0) {
1082         for (wvalue = 0, i = 0; i < 5; i++, bits--) {
1083           wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1084         }
1085 
1086         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1087         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1088         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1089         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1090         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1091         bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
1092       }
1093     } else {
1094       const uint8_t *p_bytes = (const uint8_t *)p->d;
1095       assert(bits < max_bits);
1096       // |p = 0| has been handled as a special case, so |max_bits| is at least
1097       // one word.
1098       assert(max_bits >= 64);
1099 
1100       // If the first bit to be read lands in the last byte, unroll the first
1101       // iteration to avoid reading past the bounds of |p->d|. (After the first
1102       // iteration, we are guaranteed to be past the last byte.) Note |bits|
1103       // here is the top bit, inclusive.
1104       if (bits - 4 >= max_bits - 8) {
1105         // Read five bits from |bits-4| through |bits|, inclusive.
1106         wvalue = p_bytes[p->width * BN_BYTES - 1];
1107         wvalue >>= (bits - 4) & 7;
1108         wvalue &= 0x1f;
1109         bits -= 5;
1110         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
1111       }
1112       while (bits >= 0) {
1113         // Read five bits from |bits-4| through |bits|, inclusive.
1114         int first_bit = bits - 4;
1115         uint16_t val;
1116         OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val));
1117         val >>= first_bit & 7;
1118         val &= 0x1f;
1119         bits -= 5;
1120         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val);
1121       }
1122     }
1123     // The result is now in |tmp| in Montgomery form, but it may not be fully
1124     // reduced. This is within bounds for |BN_from_montgomery| (tmp < R <= m*R)
1125     // so it will, when converting from Montgomery form, produce a fully reduced
1126     // result.
1127     //
1128     // This differs from Figure 2 of the paper, which uses AMM(h, 1) to convert
1129     // from Montgomery form with unreduced output, followed by an extra
1130     // reduction step. In the paper's terminology, we replace steps 9 and 10
1131     // with MM(h, 1).
1132   } else
1133 #endif
1134   {
1135     copy_to_prebuf(&tmp, top, powerbuf, 0, window);
1136     copy_to_prebuf(&am, top, powerbuf, 1, window);
1137 
1138     // If the window size is greater than 1, then calculate
1139     // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
1140     // (even powers could instead be computed as (a^(i/2))^2
1141     // to use the slight performance advantage of sqr over mul).
1142     if (window > 1) {
1143       if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) {
1144         goto err;
1145       }
1146 
1147       copy_to_prebuf(&tmp, top, powerbuf, 2, window);
1148 
1149       for (i = 3; i < num_powers; i++) {
1150         // Calculate a^i = a^(i-1) * a
1151         if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) {
1152           goto err;
1153         }
1154 
1155         copy_to_prebuf(&tmp, top, powerbuf, i, window);
1156       }
1157     }
1158 
1159     bits--;
1160     for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
1161       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1162     }
1163     if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
1164       goto err;
1165     }
1166 
1167     // Scan the exponent one window at a time starting from the most
1168     // significant bits.
1169     while (bits >= 0) {
1170       wvalue = 0;  // The 'value' of the window
1171 
1172       // Scan the window, squaring the result as we go
1173       for (i = 0; i < window; i++, bits--) {
1174         if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
1175           goto err;
1176         }
1177         wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1178       }
1179 
1180       // Fetch the appropriate pre-computed value from the pre-buf
1181       if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
1182         goto err;
1183       }
1184 
1185       // Multiply the result into the intermediate result
1186       if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
1187         goto err;
1188       }
1189     }
1190   }
1191 
1192   // Convert the final result from Montgomery to standard format. If we used the
1193   // |OPENSSL_BN_ASM_MONT5| codepath, |tmp| may not be fully reduced. It is only
1194   // bounded by R rather than |m|. However, that is still within bounds for
1195   // |BN_from_montgomery|, which implements full Montgomery reduction, not
1196   // "almost" Montgomery reduction.
1197   if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
1198     goto err;
1199   }
1200   ret = 1;
1201 
1202 err:
1203   BN_MONT_CTX_free(new_mont);
1204   if (powerbuf != NULL && powerbuf_free == NULL) {
1205     OPENSSL_cleanse(powerbuf, powerbuf_len);
1206   }
1207   OPENSSL_free(powerbuf_free);
1208   return ret;
1209 }
1210 
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)1211 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
1212                          const BIGNUM *m, BN_CTX *ctx,
1213                          const BN_MONT_CTX *mont) {
1214   BIGNUM a_bignum;
1215   BN_init(&a_bignum);
1216 
1217   int ret = 0;
1218 
1219   // BN_mod_exp_mont requires reduced inputs.
1220   if (bn_minimal_width(m) == 1) {
1221     a %= m->d[0];
1222   }
1223 
1224   if (!BN_set_word(&a_bignum, a)) {
1225     OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
1226     goto err;
1227   }
1228 
1229   ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
1230 
1231 err:
1232   BN_free(&a_bignum);
1233 
1234   return ret;
1235 }
1236 
1237 #define TABLE_SIZE 32
1238 
BN_mod_exp2_mont(BIGNUM * rr,const BIGNUM * a1,const BIGNUM * p1,const BIGNUM * a2,const BIGNUM * p2,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)1239 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1240                      const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
1241                      BN_CTX *ctx, const BN_MONT_CTX *mont) {
1242   BIGNUM tmp;
1243   BN_init(&tmp);
1244 
1245   int ret = 0;
1246   BN_MONT_CTX *new_mont = NULL;
1247 
1248   // Allocate a montgomery context if it was not supplied by the caller.
1249   if (mont == NULL) {
1250     new_mont = BN_MONT_CTX_new_for_modulus(m, ctx);
1251     if (new_mont == NULL) {
1252       goto err;
1253     }
1254     mont = new_mont;
1255   }
1256 
1257   // BN_mod_mul_montgomery removes one Montgomery factor, so passing one
1258   // Montgomery-encoded and one non-Montgomery-encoded value gives a
1259   // non-Montgomery-encoded result.
1260   if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
1261       !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
1262       !BN_to_montgomery(rr, rr, mont, ctx) ||
1263       !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
1264     goto err;
1265   }
1266 
1267   ret = 1;
1268 
1269 err:
1270   BN_MONT_CTX_free(new_mont);
1271   BN_free(&tmp);
1272 
1273   return ret;
1274 }
1275