• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* crypto/bn/bn_exp.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 
113 #include "cryptlib.h"
114 #include "bn_lcl.h"
115 
116 /* maximum precomputation table size for *variable* sliding windows */
117 #define TABLE_SIZE	32
118 
119 /* this one works - simple but works */
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)120 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 	{
122 	int i,bits,ret=0;
123 	BIGNUM *v,*rr;
124 
125 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
126 		{
127 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
128 		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 		return -1;
130 		}
131 
132 	BN_CTX_start(ctx);
133 	if ((r == a) || (r == p))
134 		rr = BN_CTX_get(ctx);
135 	else
136 		rr = r;
137 	v = BN_CTX_get(ctx);
138 	if (rr == NULL || v == NULL) goto err;
139 
140 	if (BN_copy(v,a) == NULL) goto err;
141 	bits=BN_num_bits(p);
142 
143 	if (BN_is_odd(p))
144 		{ if (BN_copy(rr,a) == NULL) goto err; }
145 	else	{ if (!BN_one(rr)) goto err; }
146 
147 	for (i=1; i<bits; i++)
148 		{
149 		if (!BN_sqr(v,v,ctx)) goto err;
150 		if (BN_is_bit_set(p,i))
151 			{
152 			if (!BN_mul(rr,rr,v,ctx)) goto err;
153 			}
154 		}
155 	ret=1;
156 err:
157 	if (r != rr) BN_copy(r,rr);
158 	BN_CTX_end(ctx);
159 	bn_check_top(r);
160 	return(ret);
161 	}
162 
163 
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)164 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
165 	       BN_CTX *ctx)
166 	{
167 	int ret;
168 
169 	bn_check_top(a);
170 	bn_check_top(p);
171 	bn_check_top(m);
172 
173 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
174 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
175 	 * exponentiation for the odd part), using appropriate exponent
176 	 * reductions, and combine the results using the CRT.
177 	 *
178 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
179 	 * exponentiation using the reciprocal-based quick remaindering
180 	 * algorithm is used.
181 	 *
182 	 * (Timing obtained with expspeed.c [computations  a^p mod m
183 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
184 	 * 4096, 8192 bits], compared to the running time of the
185 	 * standard algorithm:
186 	 *
187 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
188          *                     55 .. 77 %  [UltraSparc processor, but
189 	 *                                  debug-solaris-sparcv8-gcc conf.]
190 	 *
191 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
192 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
193 	 *
194 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
195 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
196 	 * slower even than the standard algorithm!
197 	 *
198 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
199 	 * should be obtained when the new Montgomery reduction code
200 	 * has been integrated into OpenSSL.)
201 	 */
202 
203 #define MONT_MUL_MOD
204 #define MONT_EXP_WORD
205 #define RECP_MUL_MOD
206 
207 #ifdef MONT_MUL_MOD
208 	/* I have finally been able to take out this pre-condition of
209 	 * the top bit being set.  It was caused by an error in BN_div
210 	 * with negatives.  There was also another problem when for a^b%m
211 	 * a >= m.  eay 07-May-97 */
212 /*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
213 
214 	if (BN_is_odd(m))
215 		{
216 #  ifdef MONT_EXP_WORD
217 		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
218 			{
219 			BN_ULONG A = a->d[0];
220 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
221 			}
222 		else
223 #  endif
224 			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
225 		}
226 	else
227 #endif
228 #ifdef RECP_MUL_MOD
229 		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
230 #else
231 		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
232 #endif
233 
234 	bn_check_top(r);
235 	return(ret);
236 	}
237 
238 
BN_mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)239 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
240 		    const BIGNUM *m, BN_CTX *ctx)
241 	{
242 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
243 	int start=1;
244 	BIGNUM *aa;
245 	/* Table of variables obtained from 'ctx' */
246 	BIGNUM *val[TABLE_SIZE];
247 	BN_RECP_CTX recp;
248 
249 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
250 		{
251 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
252 		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
253 		return -1;
254 		}
255 
256 	bits=BN_num_bits(p);
257 
258 	if (bits == 0)
259 		{
260 		ret = BN_one(r);
261 		return ret;
262 		}
263 
264 	BN_CTX_start(ctx);
265 	aa = BN_CTX_get(ctx);
266 	val[0] = BN_CTX_get(ctx);
267 	if(!aa || !val[0]) goto err;
268 
269 	BN_RECP_CTX_init(&recp);
270 	if (m->neg)
271 		{
272 		/* ignore sign of 'm' */
273 		if (!BN_copy(aa, m)) goto err;
274 		aa->neg = 0;
275 		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
276 		}
277 	else
278 		{
279 		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
280 		}
281 
282 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
283 	if (BN_is_zero(val[0]))
284 		{
285 		BN_zero(r);
286 		ret = 1;
287 		goto err;
288 		}
289 
290 	window = BN_window_bits_for_exponent_size(bits);
291 	if (window > 1)
292 		{
293 		if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
294 			goto err;				/* 2 */
295 		j=1<<(window-1);
296 		for (i=1; i<j; i++)
297 			{
298 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
299 					!BN_mod_mul_reciprocal(val[i],val[i-1],
300 						aa,&recp,ctx))
301 				goto err;
302 			}
303 		}
304 
305 	start=1;	/* This is used to avoid multiplication etc
306 			 * when there is only the value '1' in the
307 			 * buffer. */
308 	wvalue=0;	/* The 'value' of the window */
309 	wstart=bits-1;	/* The top bit of the window */
310 	wend=0;		/* The bottom bit of the window */
311 
312 	if (!BN_one(r)) goto err;
313 
314 	for (;;)
315 		{
316 		if (BN_is_bit_set(p,wstart) == 0)
317 			{
318 			if (!start)
319 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
320 				goto err;
321 			if (wstart == 0) break;
322 			wstart--;
323 			continue;
324 			}
325 		/* We now have wstart on a 'set' bit, we now need to work out
326 		 * how bit a window to do.  To do this we need to scan
327 		 * forward until the last set bit before the end of the
328 		 * window */
329 		j=wstart;
330 		wvalue=1;
331 		wend=0;
332 		for (i=1; i<window; i++)
333 			{
334 			if (wstart-i < 0) break;
335 			if (BN_is_bit_set(p,wstart-i))
336 				{
337 				wvalue<<=(i-wend);
338 				wvalue|=1;
339 				wend=i;
340 				}
341 			}
342 
343 		/* wend is the size of the current window */
344 		j=wend+1;
345 		/* add the 'bytes above' */
346 		if (!start)
347 			for (i=0; i<j; i++)
348 				{
349 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
350 					goto err;
351 				}
352 
353 		/* wvalue will be an odd number < 2^window */
354 		if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
355 			goto err;
356 
357 		/* move the 'window' down further */
358 		wstart-=wend+1;
359 		wvalue=0;
360 		start=0;
361 		if (wstart < 0) break;
362 		}
363 	ret=1;
364 err:
365 	BN_CTX_end(ctx);
366 	BN_RECP_CTX_free(&recp);
367 	bn_check_top(r);
368 	return(ret);
369 	}
370 
371 
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)372 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
373 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
374 	{
375 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
376 	int start=1;
377 	BIGNUM *d,*r;
378 	const BIGNUM *aa;
379 	/* Table of variables obtained from 'ctx' */
380 	BIGNUM *val[TABLE_SIZE];
381 	BN_MONT_CTX *mont=NULL;
382 
383 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
384 		{
385 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
386 		}
387 
388 	bn_check_top(a);
389 	bn_check_top(p);
390 	bn_check_top(m);
391 
392 	if (!BN_is_odd(m))
393 		{
394 		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
395 		return(0);
396 		}
397 	bits=BN_num_bits(p);
398 	if (bits == 0)
399 		{
400 		ret = BN_one(rr);
401 		return ret;
402 		}
403 
404 	BN_CTX_start(ctx);
405 	d = BN_CTX_get(ctx);
406 	r = BN_CTX_get(ctx);
407 	val[0] = BN_CTX_get(ctx);
408 	if (!d || !r || !val[0]) goto err;
409 
410 	/* If this is not done, things will break in the montgomery
411 	 * part */
412 
413 	if (in_mont != NULL)
414 		mont=in_mont;
415 	else
416 		{
417 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
418 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
419 		}
420 
421 	if (a->neg || BN_ucmp(a,m) >= 0)
422 		{
423 		if (!BN_nnmod(val[0],a,m,ctx))
424 			goto err;
425 		aa= val[0];
426 		}
427 	else
428 		aa=a;
429 	if (BN_is_zero(aa))
430 		{
431 		BN_zero(rr);
432 		ret = 1;
433 		goto err;
434 		}
435 	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
436 
437 	window = BN_window_bits_for_exponent_size(bits);
438 	if (window > 1)
439 		{
440 		if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
441 		j=1<<(window-1);
442 		for (i=1; i<j; i++)
443 			{
444 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
445 					!BN_mod_mul_montgomery(val[i],val[i-1],
446 						d,mont,ctx))
447 				goto err;
448 			}
449 		}
450 
451 	start=1;	/* This is used to avoid multiplication etc
452 			 * when there is only the value '1' in the
453 			 * buffer. */
454 	wvalue=0;	/* The 'value' of the window */
455 	wstart=bits-1;	/* The top bit of the window */
456 	wend=0;		/* The bottom bit of the window */
457 
458 	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
459 	for (;;)
460 		{
461 		if (BN_is_bit_set(p,wstart) == 0)
462 			{
463 			if (!start)
464 				{
465 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
466 				goto err;
467 				}
468 			if (wstart == 0) break;
469 			wstart--;
470 			continue;
471 			}
472 		/* We now have wstart on a 'set' bit, we now need to work out
473 		 * how bit a window to do.  To do this we need to scan
474 		 * forward until the last set bit before the end of the
475 		 * window */
476 		j=wstart;
477 		wvalue=1;
478 		wend=0;
479 		for (i=1; i<window; i++)
480 			{
481 			if (wstart-i < 0) break;
482 			if (BN_is_bit_set(p,wstart-i))
483 				{
484 				wvalue<<=(i-wend);
485 				wvalue|=1;
486 				wend=i;
487 				}
488 			}
489 
490 		/* wend is the size of the current window */
491 		j=wend+1;
492 		/* add the 'bytes above' */
493 		if (!start)
494 			for (i=0; i<j; i++)
495 				{
496 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
497 					goto err;
498 				}
499 
500 		/* wvalue will be an odd number < 2^window */
501 		if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
502 			goto err;
503 
504 		/* move the 'window' down further */
505 		wstart-=wend+1;
506 		wvalue=0;
507 		start=0;
508 		if (wstart < 0) break;
509 		}
510 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
511 	ret=1;
512 err:
513 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
514 	BN_CTX_end(ctx);
515 	bn_check_top(rr);
516 	return(ret);
517 	}
518 
519 
520 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
521  * so that accessing any of these table values shows the same access pattern as far
522  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
523  * from/to that table. */
524 
MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int width)525 static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
526 	{
527 	size_t i, j;
528 
529 	if (bn_wexpand(b, top) == NULL)
530 		return 0;
531 	while (b->top < top)
532 		{
533 		b->d[b->top++] = 0;
534 		}
535 
536 	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
537 		{
538 		buf[j] = ((unsigned char*)b->d)[i];
539 		}
540 
541 	bn_correct_top(b);
542 	return 1;
543 	}
544 
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int width)545 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
546 	{
547 	size_t i, j;
548 
549 	if (bn_wexpand(b, top) == NULL)
550 		return 0;
551 
552 	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
553 		{
554 		((unsigned char*)b->d)[i] = buf[j];
555 		}
556 
557 	b->top = top;
558 	bn_correct_top(b);
559 	return 1;
560 	}
561 
562 /* Given a pointer value, compute the next address that is a cache line multiple. */
563 #define MOD_EXP_CTIME_ALIGN(x_) \
564 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
565 
566 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
567  * precomputation memory layout to limit data-dependency to a minimum
568  * to protect secret exponents (cf. the hyper-threading timing attacks
569  * pointed out by Colin Percival,
570  * http://www.daemonology.net/hyperthreading-considered-harmful/)
571  */
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)572 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
573 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
574 	{
575 	int i,bits,ret=0,idx,window,wvalue;
576 	int top;
577  	BIGNUM *r;
578 	const BIGNUM *aa;
579 	BN_MONT_CTX *mont=NULL;
580 
581 	int numPowers;
582 	unsigned char *powerbufFree=NULL;
583 	int powerbufLen = 0;
584 	unsigned char *powerbuf=NULL;
585 	BIGNUM *computeTemp=NULL, *am=NULL;
586 
587 	bn_check_top(a);
588 	bn_check_top(p);
589 	bn_check_top(m);
590 
591 	top = m->top;
592 
593 	if (!(m->d[0] & 1))
594 		{
595 		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
596 		return(0);
597 		}
598 	bits=BN_num_bits(p);
599 	if (bits == 0)
600 		{
601 		ret = BN_one(rr);
602 		return ret;
603 		}
604 
605  	/* Initialize BIGNUM context and allocate intermediate result */
606 	BN_CTX_start(ctx);
607 	r = BN_CTX_get(ctx);
608 	if (r == NULL) goto err;
609 
610 	/* Allocate a montgomery context if it was not supplied by the caller.
611 	 * If this is not done, things will break in the montgomery part.
612  	 */
613 	if (in_mont != NULL)
614 		mont=in_mont;
615 	else
616 		{
617 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
618 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
619 		}
620 
621 	/* Get the window size to use with size of p. */
622 	window = BN_window_bits_for_ctime_exponent_size(bits);
623 
624 	/* Allocate a buffer large enough to hold all of the pre-computed
625 	 * powers of a.
626 	 */
627 	numPowers = 1 << window;
628 	powerbufLen = sizeof(m->d[0])*top*numPowers;
629 	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
630 		goto err;
631 
632 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
633 	memset(powerbuf, 0, powerbufLen);
634 
635  	/* Initialize the intermediate result. Do this early to save double conversion,
636 	 * once each for a^0 and intermediate result.
637 	 */
638  	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
639 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
640 
641 	/* Initialize computeTemp as a^1 with montgomery precalcs */
642 	computeTemp = BN_CTX_get(ctx);
643 	am = BN_CTX_get(ctx);
644 	if (computeTemp==NULL || am==NULL) goto err;
645 
646 	if (a->neg || BN_ucmp(a,m) >= 0)
647 		{
648 		if (!BN_mod(am,a,m,ctx))
649 			goto err;
650 		aa= am;
651 		}
652 	else
653 		aa=a;
654 	if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
655 	if (!BN_copy(computeTemp, am)) goto err;
656 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
657 
658 	/* If the window size is greater than 1, then calculate
659 	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
660 	 * (even powers could instead be computed as (a^(i/2))^2
661 	 * to use the slight performance advantage of sqr over mul).
662 	 */
663 	if (window > 1)
664 		{
665 		for (i=2; i<numPowers; i++)
666 			{
667 			/* Calculate a^i = a^(i-1) * a */
668 			if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
669 				goto err;
670 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
671 			}
672 		}
673 
674  	/* Adjust the number of bits up to a multiple of the window size.
675  	 * If the exponent length is not a multiple of the window size, then
676  	 * this pads the most significant bits with zeros to normalize the
677  	 * scanning loop to there's no special cases.
678  	 *
679  	 * * NOTE: Making the window size a power of two less than the native
680 	 * * word size ensures that the padded bits won't go past the last
681  	 * * word in the internal BIGNUM structure. Going past the end will
682  	 * * still produce the correct result, but causes a different branch
683  	 * * to be taken in the BN_is_bit_set function.
684  	 */
685  	bits = ((bits+window-1)/window)*window;
686  	idx=bits-1;	/* The top bit of the window */
687 
688  	/* Scan the exponent one window at a time starting from the most
689  	 * significant bits.
690  	 */
691  	while (idx >= 0)
692   		{
693  		wvalue=0; /* The 'value' of the window */
694 
695  		/* Scan the window, squaring the result as we go */
696  		for (i=0; i<window; i++,idx--)
697  			{
698 			if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))	goto err;
699 			wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
700   			}
701 
702 		/* Fetch the appropriate pre-computed value from the pre-buf */
703 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
704 
705  		/* Multiply the result into the intermediate result */
706  		if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
707   		}
708 
709  	/* Convert the final result from montgomery to standard format */
710 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
711 	ret=1;
712 err:
713 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
714 	if (powerbuf!=NULL)
715 		{
716 		OPENSSL_cleanse(powerbuf,powerbufLen);
717 		OPENSSL_free(powerbufFree);
718 		}
719  	if (am!=NULL) BN_clear(am);
720  	if (computeTemp!=NULL) BN_clear(computeTemp);
721 	BN_CTX_end(ctx);
722 	return(ret);
723 	}
724 
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)725 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
726                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
727 	{
728 	BN_MONT_CTX *mont = NULL;
729 	int b, bits, ret=0;
730 	int r_is_one;
731 	BN_ULONG w, next_w;
732 	BIGNUM *d, *r, *t;
733 	BIGNUM *swap_tmp;
734 #define BN_MOD_MUL_WORD(r, w, m) \
735 		(BN_mul_word(r, (w)) && \
736 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
737 			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
738 		/* BN_MOD_MUL_WORD is only used with 'w' large,
739 		 * so the BN_ucmp test is probably more overhead
740 		 * than always using BN_mod (which uses BN_copy if
741 		 * a similar test returns true). */
742 		/* We can use BN_mod and do not need BN_nnmod because our
743 		 * accumulator is never negative (the result of BN_mod does
744 		 * not depend on the sign of the modulus).
745 		 */
746 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
747 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
748 
749 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
750 		{
751 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
752 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
753 		return -1;
754 		}
755 
756 	bn_check_top(p);
757 	bn_check_top(m);
758 
759 	if (!BN_is_odd(m))
760 		{
761 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
762 		return(0);
763 		}
764 	if (m->top == 1)
765 		a %= m->d[0]; /* make sure that 'a' is reduced */
766 
767 	bits = BN_num_bits(p);
768 	if (bits == 0)
769 		{
770 		ret = BN_one(rr);
771 		return ret;
772 		}
773 	if (a == 0)
774 		{
775 		BN_zero(rr);
776 		ret = 1;
777 		return ret;
778 		}
779 
780 	BN_CTX_start(ctx);
781 	d = BN_CTX_get(ctx);
782 	r = BN_CTX_get(ctx);
783 	t = BN_CTX_get(ctx);
784 	if (d == NULL || r == NULL || t == NULL) goto err;
785 
786 	if (in_mont != NULL)
787 		mont=in_mont;
788 	else
789 		{
790 		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
791 		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
792 		}
793 
794 	r_is_one = 1; /* except for Montgomery factor */
795 
796 	/* bits-1 >= 0 */
797 
798 	/* The result is accumulated in the product r*w. */
799 	w = a; /* bit 'bits-1' of 'p' is always set */
800 	for (b = bits-2; b >= 0; b--)
801 		{
802 		/* First, square r*w. */
803 		next_w = w*w;
804 		if ((next_w/w) != w) /* overflow */
805 			{
806 			if (r_is_one)
807 				{
808 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
809 				r_is_one = 0;
810 				}
811 			else
812 				{
813 				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
814 				}
815 			next_w = 1;
816 			}
817 		w = next_w;
818 		if (!r_is_one)
819 			{
820 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
821 			}
822 
823 		/* Second, multiply r*w by 'a' if exponent bit is set. */
824 		if (BN_is_bit_set(p, b))
825 			{
826 			next_w = w*a;
827 			if ((next_w/a) != w) /* overflow */
828 				{
829 				if (r_is_one)
830 					{
831 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
832 					r_is_one = 0;
833 					}
834 				else
835 					{
836 					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
837 					}
838 				next_w = a;
839 				}
840 			w = next_w;
841 			}
842 		}
843 
844 	/* Finally, set r:=r*w. */
845 	if (w != 1)
846 		{
847 		if (r_is_one)
848 			{
849 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
850 			r_is_one = 0;
851 			}
852 		else
853 			{
854 			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
855 			}
856 		}
857 
858 	if (r_is_one) /* can happen only if a == 1*/
859 		{
860 		if (!BN_one(rr)) goto err;
861 		}
862 	else
863 		{
864 		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
865 		}
866 	ret = 1;
867 err:
868 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
869 	BN_CTX_end(ctx);
870 	bn_check_top(rr);
871 	return(ret);
872 	}
873 
874 
875 /* The old fallback, simple version :-) */
BN_mod_exp_simple(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)876 int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
877 		const BIGNUM *m, BN_CTX *ctx)
878 	{
879 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
880 	int start=1;
881 	BIGNUM *d;
882 	/* Table of variables obtained from 'ctx' */
883 	BIGNUM *val[TABLE_SIZE];
884 
885 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
886 		{
887 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
888 		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
889 		return -1;
890 		}
891 
892 	bits=BN_num_bits(p);
893 
894 	if (bits == 0)
895 		{
896 		ret = BN_one(r);
897 		return ret;
898 		}
899 
900 	BN_CTX_start(ctx);
901 	d = BN_CTX_get(ctx);
902 	val[0] = BN_CTX_get(ctx);
903 	if(!d || !val[0]) goto err;
904 
905 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
906 	if (BN_is_zero(val[0]))
907 		{
908 		BN_zero(r);
909 		ret = 1;
910 		goto err;
911 		}
912 
913 	window = BN_window_bits_for_exponent_size(bits);
914 	if (window > 1)
915 		{
916 		if (!BN_mod_mul(d,val[0],val[0],m,ctx))
917 			goto err;				/* 2 */
918 		j=1<<(window-1);
919 		for (i=1; i<j; i++)
920 			{
921 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
922 					!BN_mod_mul(val[i],val[i-1],d,m,ctx))
923 				goto err;
924 			}
925 		}
926 
927 	start=1;	/* This is used to avoid multiplication etc
928 			 * when there is only the value '1' in the
929 			 * buffer. */
930 	wvalue=0;	/* The 'value' of the window */
931 	wstart=bits-1;	/* The top bit of the window */
932 	wend=0;		/* The bottom bit of the window */
933 
934 	if (!BN_one(r)) goto err;
935 
936 	for (;;)
937 		{
938 		if (BN_is_bit_set(p,wstart) == 0)
939 			{
940 			if (!start)
941 				if (!BN_mod_mul(r,r,r,m,ctx))
942 				goto err;
943 			if (wstart == 0) break;
944 			wstart--;
945 			continue;
946 			}
947 		/* We now have wstart on a 'set' bit, we now need to work out
948 		 * how bit a window to do.  To do this we need to scan
949 		 * forward until the last set bit before the end of the
950 		 * window */
951 		j=wstart;
952 		wvalue=1;
953 		wend=0;
954 		for (i=1; i<window; i++)
955 			{
956 			if (wstart-i < 0) break;
957 			if (BN_is_bit_set(p,wstart-i))
958 				{
959 				wvalue<<=(i-wend);
960 				wvalue|=1;
961 				wend=i;
962 				}
963 			}
964 
965 		/* wend is the size of the current window */
966 		j=wend+1;
967 		/* add the 'bytes above' */
968 		if (!start)
969 			for (i=0; i<j; i++)
970 				{
971 				if (!BN_mod_mul(r,r,r,m,ctx))
972 					goto err;
973 				}
974 
975 		/* wvalue will be an odd number < 2^window */
976 		if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
977 			goto err;
978 
979 		/* move the 'window' down further */
980 		wstart-=wend+1;
981 		wvalue=0;
982 		start=0;
983 		if (wstart < 0) break;
984 		}
985 	ret=1;
986 err:
987 	BN_CTX_end(ctx);
988 	bn_check_top(r);
989 	return(ret);
990 	}
991 
992