• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <openssl/bn.h>
11
12#include <assert.h>
13#include <limits.h>
14
15#include <openssl/err.h>
16
17#include "internal.h"
18
19
20// bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
21// which must fit in a |BN_ULONG|.
22static inline BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
23  BN_ULONG dh, dl, q, ret = 0, th, tl, t;
24  int i, count = 2;
25
26  if (d == 0) {
27    return BN_MASK2;
28  }
29
30  i = BN_num_bits_word(d);
31  assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
32
33  i = BN_BITS2 - i;
34  if (h >= d) {
35    h -= d;
36  }
37
38  if (i) {
39    d <<= i;
40    h = (h << i) | (l >> (BN_BITS2 - i));
41    l <<= i;
42  }
43  dh = (d & BN_MASK2h) >> BN_BITS4;
44  dl = (d & BN_MASK2l);
45  for (;;) {
46    if ((h >> BN_BITS4) == dh) {
47      q = BN_MASK2l;
48    } else {
49      q = h / dh;
50    }
51
52    th = q * dh;
53    tl = dl * q;
54    for (;;) {
55      t = h - th;
56      if ((t & BN_MASK2h) ||
57          ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) {
58        break;
59      }
60      q--;
61      th -= dh;
62      tl -= dl;
63    }
64    t = (tl >> BN_BITS4);
65    tl = (tl << BN_BITS4) & BN_MASK2h;
66    th += t;
67
68    if (l < tl) {
69      th++;
70    }
71    l -= tl;
72    if (h < th) {
73      h += d;
74      q--;
75    }
76    h -= th;
77
78    if (--count == 0) {
79      break;
80    }
81
82    ret = q << BN_BITS4;
83    h = (h << BN_BITS4) | (l >> BN_BITS4);
84    l = (l & BN_MASK2l) << BN_BITS4;
85  }
86
87  ret |= q;
88  return ret;
89}
90
91static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
92                                    BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
93  // GCC and Clang generate function calls to |__udivdi3| and |__umoddi3| when
94  // the |BN_ULLONG|-based C code is used.
95  //
96  // GCC bugs:
97  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
98  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721
99  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54183
100  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58897
101  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65668
102  //
103  // Clang bugs:
104  //   * https://github.com/llvm/llvm-project/issues/6769
105  //   * https://github.com/llvm/llvm-project/issues/12790
106  //
107  // These is specific to x86 and x86_64; Arm and RISC-V do not have double-wide
108  // division instructions.
109#if defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86)
110  __asm__ volatile("divl %4"
111                   : "=a"(*quotient_out), "=d"(*rem_out)
112                   : "a"(n1), "d"(n0), "rm"(d0)
113                   : "cc");
114#elif defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86_64)
115  __asm__ volatile("divq %4"
116                   : "=a"(*quotient_out), "=d"(*rem_out)
117                   : "a"(n1), "d"(n0), "rm"(d0)
118                   : "cc");
119#else
120#if defined(BN_CAN_DIVIDE_ULLONG)
121  BN_ULLONG n = (((BN_ULLONG)n0) << BN_BITS2) | n1;
122  *quotient_out = (BN_ULONG)(n / d0);
123#else
124  *quotient_out = bn_div_words(n0, n1, d0);
125#endif
126  *rem_out = n1 - (*quotient_out * d0);
127#endif
128}
129
130int BN_div(BIGNUM *quotient, BIGNUM *rem, const BIGNUM *numerator,
131           const BIGNUM *divisor, BN_CTX *ctx) {
132  // This function implements long division, per Knuth, The Art of Computer
133  // Programming, Volume 2, Chapter 4.3.1, Algorithm D. This algorithm only
134  // divides non-negative integers, but we round towards zero, so we divide
135  // absolute values and adjust the signs separately.
136  //
137  // Inputs to this function are assumed public and may be leaked by timing and
138  // cache side channels. Division with secret inputs should use other
139  // implementation strategies such as Montgomery reduction.
140  if (BN_is_zero(divisor)) {
141    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
142    return 0;
143  }
144
145  BN_CTX_start(ctx);
146  BIGNUM *tmp = BN_CTX_get(ctx);
147  BIGNUM *snum = BN_CTX_get(ctx);
148  BIGNUM *sdiv = BN_CTX_get(ctx);
149  BIGNUM *res = quotient == NULL ? BN_CTX_get(ctx) : quotient;
150  int norm_shift, num_n, loop, div_n;
151  BN_ULONG d0, d1;
152  if (tmp == NULL || snum == NULL || sdiv == NULL || res == NULL) {
153    goto err;
154  }
155
156  // Knuth step D1: Normalise the numbers such that the divisor's MSB is set.
157  // This ensures, in Knuth's terminology, that v1 >= b/2, needed for the
158  // quotient estimation step.
159  norm_shift = BN_BITS2 - (BN_num_bits(divisor) % BN_BITS2);
160  if (!BN_lshift(sdiv, divisor, norm_shift) ||
161      !BN_lshift(snum, numerator, norm_shift)) {
162    goto err;
163  }
164
165  // This algorithm relies on |sdiv| being minimal width. We do not use this
166  // function on secret inputs, so leaking this is fine. Also minimize |snum| to
167  // avoid looping on leading zeros, as we're not trying to be leak-free.
168  bn_set_minimal_width(sdiv);
169  bn_set_minimal_width(snum);
170  div_n = sdiv->width;
171  d0 = sdiv->d[div_n - 1];
172  d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
173  assert(d0 & (((BN_ULONG)1) << (BN_BITS2 - 1)));
174
175  // Extend |snum| with zeros to satisfy the long division invariants:
176  // - |snum| must have at least |div_n| + 1 words.
177  // - |snum|'s most significant word must be zero to guarantee the first loop
178  //   iteration works with a prefix greater than |sdiv|. (This is the extra u0
179  //   digit in Knuth step D1.)
180  num_n = snum->width <= div_n ? div_n + 1 : snum->width + 1;
181  if (!bn_resize_words(snum, num_n)) {
182    goto err;
183  }
184
185  // Knuth step D2: The quotient's width is the difference between numerator and
186  // denominator. Also set up its sign and size a temporary for the loop.
187  loop = num_n - div_n;
188  res->neg = snum->neg ^ sdiv->neg;
189  if (!bn_wexpand(res, loop) ||  //
190      !bn_wexpand(tmp, div_n + 1)) {
191    goto err;
192  }
193  res->width = loop;
194
195  // Knuth steps D2 through D7: Compute the quotient with a word-by-word long
196  // division. Note that Knuth indexes words from most to least significant, so
197  // our index is reversed. Each loop iteration computes res->d[i] of the
198  // quotient and updates snum with the running remainder. Before each loop
199  // iteration, the div_n words beginning at snum->d[i+1] must be less than
200  // snum.
201  for (int i = loop - 1; i >= 0; i--) {
202    // The next word of the quotient, q, is floor(wnum / sdiv), where wnum is
203    // the div_n + 1 words beginning at snum->d[i]. i starts at
204    // num_n - div_n - 1, so there are at least div_n + 1 words available.
205    //
206    // Knuth step D3: Compute q', an estimate of q by looking at the top words
207    // of wnum and sdiv. We must estimate such that q' = q or q' = q + 1.
208    BN_ULONG q, rm = 0;
209    BN_ULONG *wnum = snum->d + i;
210    BN_ULONG n0 = wnum[div_n];
211    BN_ULONG n1 = wnum[div_n - 1];
212    if (n0 == d0) {
213      // Estimate q' = b - 1, where b is the base.
214      q = BN_MASK2;
215      // Knuth also runs the fixup routine in this case, but this would require
216      // computing rm and is unnecessary. q' is already close enough. That is,
217      // the true quotient, q is either b - 1 or b - 2.
218      //
219      // By the loop invariant, q <= b - 1, so we must show that q >= b - 2. We
220      // do this by showing wnum / sdiv >= b - 2. Suppose wnum / sdiv < b - 2.
221      // wnum and sdiv have the same most significant word, so:
222      //
223      //    wnum >= n0 * b^div_n
224      //    sdiv <  (n0 + 1) * b^(d_div - 1)
225      //
226      // Thus:
227      //
228      //    b - 2 > wnum / sdiv
229      //          > (n0 * b^div_n) / (n0 + 1) * b^(div_n - 1)
230      //          = (n0 * b) / (n0 + 1)
231      //
232      //         (n0 + 1) * (b - 2) > n0 * b
233      //    n0 * b + b - 2 * n0 - 2 > n0 * b
234      //                      b - 2 > 2 * n0
235      //                    b/2 - 1 > n0
236      //
237      // This contradicts the normalization condition, so q >= b - 2 and our
238      // estimate is close enough.
239    } else {
240      // Estimate q' = floor(n0n1 / d0). Per Theorem B, q' - 2 <= q <= q', which
241      // is slightly outside of our bounds.
242      assert(n0 < d0);
243      bn_div_rem_words(&q, &rm, n0, n1, d0);
244
245      // Fix the estimate by examining one more word and adjusting q' as needed.
246      // This is the second half of step D3 and is sufficient per exercises 19,
247      // 20, and 21. Although only one iteration is needed to correct q + 2 to
248      // q + 1, Knuth uses a loop. A loop will often also correct q + 1 to q,
249      // saving the slightly more expensive underflow handling below.
250      if (div_n > 1) {
251        BN_ULONG n2 = wnum[div_n - 2];
252#ifdef BN_ULLONG
253        BN_ULLONG t2 = (BN_ULLONG)d1 * q;
254        for (;;) {
255          if (t2 <= ((((BN_ULLONG)rm) << BN_BITS2) | n2)) {
256            break;
257          }
258          q--;
259          rm += d0;
260          if (rm < d0) {
261            // If rm overflows, the true value exceeds BN_ULONG and the next
262            // t2 comparison should exit the loop.
263            break;
264          }
265          t2 -= d1;
266        }
267#else   // !BN_ULLONG
268        BN_ULONG t2l, t2h;
269        BN_UMULT_LOHI(t2l, t2h, d1, q);
270        for (;;) {
271          if (t2h < rm || (t2h == rm && t2l <= n2)) {
272            break;
273          }
274          q--;
275          rm += d0;
276          if (rm < d0) {
277            // If rm overflows, the true value exceeds BN_ULONG and the next
278            // t2 comparison should exit the loop.
279            break;
280          }
281          if (t2l < d1) {
282            t2h--;
283          }
284          t2l -= d1;
285        }
286#endif  // !BN_ULLONG
287      }
288    }
289
290    // Knuth step D4 through D6: Now q' = q or q' = q + 1, and
291    // -sdiv < wnum - sdiv * q < sdiv. If q' = q + 1, the subtraction will
292    // underflow, and we fix it up below.
293    tmp->d[div_n] = bn_mul_words(tmp->d, sdiv->d, div_n, q);
294    if (bn_sub_words(wnum, wnum, tmp->d, div_n + 1)) {
295      q--;
296      // The final addition is expected to overflow, canceling the underflow.
297      wnum[div_n] += bn_add_words(wnum, wnum, sdiv->d, div_n);
298    }
299
300    // q is now correct, and wnum has been updated to the running remainder.
301    res->d[i] = q;
302  }
303
304  // Trim leading zeros and correct any negative zeros.
305  bn_set_minimal_width(snum);
306  bn_set_minimal_width(res);
307
308  // Knuth step D8: Unnormalize. snum now contains the remainder.
309  if (rem != NULL && !BN_rshift(rem, snum, norm_shift)) {
310    goto err;
311  }
312
313  BN_CTX_end(ctx);
314  return 1;
315
316err:
317  BN_CTX_end(ctx);
318  return 0;
319}
320
321int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) {
322  if (!(BN_mod(r, m, d, ctx))) {
323    return 0;
324  }
325  if (!r->neg) {
326    return 1;
327  }
328
329  // now -d < r < 0, so we have to set r := r + d. Ignoring the sign bits, this
330  // is r = d - r.
331  return BN_usub(r, d, r);
332}
333
334BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
335                        const BN_ULONG *m, size_t num) {
336  assert(r != a);
337  // |r| = |a| - |m|. |bn_sub_words| performs the bulk of the subtraction, and
338  // then we apply the borrow to |carry|.
339  carry -= bn_sub_words(r, a, m, num);
340  // We know 0 <= |a| < 2*|m|, so -|m| <= |r| < |m|.
341  //
342  // If 0 <= |r| < |m|, |r| fits in |num| words and |carry| is zero. We then
343  // wish to select |r| as the answer. Otherwise -m <= r < 0 and we wish to
344  // return |r| + |m|, or |a|. |carry| must then be -1 or all ones. In both
345  // cases, |carry| is a suitable input to |bn_select_words|.
346  //
347  // Although |carry| may be one if it was one on input and |bn_sub_words|
348  // returns zero, this would give |r| > |m|, violating our input assumptions.
349  declassify_assert(carry + 1 <= 1);
350  bn_select_words(r, carry, a /* r < 0 */, r /* r >= 0 */, num);
351  return carry;
352}
353
354BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
355                                 BN_ULONG *tmp, size_t num) {
356  // See |bn_reduce_once| for why this logic works.
357  carry -= bn_sub_words(tmp, r, m, num);
358  declassify_assert(carry + 1 <= 1);
359  bn_select_words(r, carry, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
360  return carry;
361}
362
363void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
364                      const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
365  // r = a - b
366  BN_ULONG borrow = bn_sub_words(r, a, b, num);
367  // tmp = a - b + m
368  bn_add_words(tmp, r, m, num);
369  bn_select_words(r, 0 - borrow, tmp /* r < 0 */, r /* r >= 0 */, num);
370}
371
372void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
373                      const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
374  BN_ULONG carry = bn_add_words(r, a, b, num);
375  bn_reduce_once_in_place(r, carry, m, tmp, num);
376}
377
378int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
379                     const BIGNUM *numerator, const BIGNUM *divisor,
380                     unsigned divisor_min_bits, BN_CTX *ctx) {
381  if (BN_is_negative(numerator) || BN_is_negative(divisor)) {
382    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
383    return 0;
384  }
385  if (BN_is_zero(divisor)) {
386    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
387    return 0;
388  }
389
390  // This function implements long division in binary. It is not very efficient,
391  // but it is simple, easy to make constant-time, and performant enough for RSA
392  // key generation.
393
394  int ret = 0;
395  BN_CTX_start(ctx);
396  BIGNUM *q = quotient, *r = remainder;
397  if (quotient == NULL || quotient == numerator || quotient == divisor) {
398    q = BN_CTX_get(ctx);
399  }
400  if (remainder == NULL || remainder == numerator || remainder == divisor) {
401    r = BN_CTX_get(ctx);
402  }
403  BIGNUM *tmp = BN_CTX_get(ctx);
404  int initial_words;
405  if (q == NULL || r == NULL || tmp == NULL ||
406      !bn_wexpand(q, numerator->width) || !bn_wexpand(r, divisor->width) ||
407      !bn_wexpand(tmp, divisor->width)) {
408    goto err;
409  }
410
411  OPENSSL_memset(q->d, 0, numerator->width * sizeof(BN_ULONG));
412  q->width = numerator->width;
413  q->neg = 0;
414
415  OPENSSL_memset(r->d, 0, divisor->width * sizeof(BN_ULONG));
416  r->width = divisor->width;
417  r->neg = 0;
418
419  // Incorporate |numerator| into |r|, one bit at a time, reducing after each
420  // step. We maintain the invariant that |0 <= r < divisor| and
421  // |q * divisor + r = n| where |n| is the portion of |numerator| incorporated
422  // so far.
423  //
424  // First, we short-circuit the loop: if we know |divisor| has at least
425  // |divisor_min_bits| bits, the top |divisor_min_bits - 1| can be incorporated
426  // without reductions. This significantly speeds up |RSA_check_key|. For
427  // simplicity, we round down to a whole number of words.
428  declassify_assert(divisor_min_bits <= BN_num_bits(divisor));
429  initial_words = 0;
430  if (divisor_min_bits > 0) {
431    initial_words = (divisor_min_bits - 1) / BN_BITS2;
432    if (initial_words > numerator->width) {
433      initial_words = numerator->width;
434    }
435    OPENSSL_memcpy(r->d, numerator->d + numerator->width - initial_words,
436                   initial_words * sizeof(BN_ULONG));
437  }
438
439  for (int i = numerator->width - initial_words - 1; i >= 0; i--) {
440    for (int bit = BN_BITS2 - 1; bit >= 0; bit--) {
441      // Incorporate the next bit of the numerator, by computing
442      // r = 2*r or 2*r + 1. Note the result fits in one more word. We store the
443      // extra word in |carry|.
444      BN_ULONG carry = bn_add_words(r->d, r->d, r->d, divisor->width);
445      r->d[0] |= (numerator->d[i] >> bit) & 1;
446      // |r| was previously fully-reduced, so we know:
447      //      2*0 <= r <= 2*(divisor-1) + 1
448      //        0 <= r <= 2*divisor - 1 < 2*divisor.
449      // Thus |r| satisfies the preconditions for |bn_reduce_once_in_place|.
450      BN_ULONG subtracted = bn_reduce_once_in_place(r->d, carry, divisor->d,
451                                                    tmp->d, divisor->width);
452      // The corresponding bit of the quotient is set iff we needed to subtract.
453      q->d[i] |= (~subtracted & 1) << bit;
454    }
455  }
456
457  if ((quotient != NULL && !BN_copy(quotient, q)) ||
458      (remainder != NULL && !BN_copy(remainder, r))) {
459    goto err;
460  }
461
462  ret = 1;
463
464err:
465  BN_CTX_end(ctx);
466  return ret;
467}
468
469static BIGNUM *bn_scratch_space_from_ctx(size_t width, BN_CTX *ctx) {
470  BIGNUM *ret = BN_CTX_get(ctx);
471  if (ret == NULL || !bn_wexpand(ret, width)) {
472    return NULL;
473  }
474  ret->neg = 0;
475  ret->width = (int)width;
476  return ret;
477}
478
479// bn_resized_from_ctx returns |bn| with width at least |width| or NULL on
480// error. This is so it may be used with low-level "words" functions. If
481// necessary, it allocates a new |BIGNUM| with a lifetime of the current scope
482// in |ctx|, so the caller does not need to explicitly free it. |bn| must fit in
483// |width| words.
484static const BIGNUM *bn_resized_from_ctx(const BIGNUM *bn, size_t width,
485                                         BN_CTX *ctx) {
486  if ((size_t)bn->width >= width) {
487    // Any excess words must be zero.
488    assert(bn_fits_in_words(bn, width));
489    return bn;
490  }
491  BIGNUM *ret = bn_scratch_space_from_ctx(width, ctx);
492  if (ret == NULL || !BN_copy(ret, bn) || !bn_resize_words(ret, width)) {
493    return NULL;
494  }
495  return ret;
496}
497
498int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
499               BN_CTX *ctx) {
500  if (!BN_add(r, a, b)) {
501    return 0;
502  }
503  return BN_nnmod(r, r, m, ctx);
504}
505
506int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
507                     const BIGNUM *m) {
508  BN_CTX *ctx = BN_CTX_new();
509  int ok = ctx != NULL && bn_mod_add_consttime(r, a, b, m, ctx);
510  BN_CTX_free(ctx);
511  return ok;
512}
513
514int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
515                         const BIGNUM *m, BN_CTX *ctx) {
516  BN_CTX_start(ctx);
517  a = bn_resized_from_ctx(a, m->width, ctx);
518  b = bn_resized_from_ctx(b, m->width, ctx);
519  BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
520  int ok = a != NULL && b != NULL && tmp != NULL && bn_wexpand(r, m->width);
521  if (ok) {
522    bn_mod_add_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
523    r->width = m->width;
524    r->neg = 0;
525  }
526  BN_CTX_end(ctx);
527  return ok;
528}
529
530int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
531               BN_CTX *ctx) {
532  if (!BN_sub(r, a, b)) {
533    return 0;
534  }
535  return BN_nnmod(r, r, m, ctx);
536}
537
538int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
539                         const BIGNUM *m, BN_CTX *ctx) {
540  BN_CTX_start(ctx);
541  a = bn_resized_from_ctx(a, m->width, ctx);
542  b = bn_resized_from_ctx(b, m->width, ctx);
543  BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
544  int ok = a != NULL && b != NULL && tmp != NULL && bn_wexpand(r, m->width);
545  if (ok) {
546    bn_mod_sub_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
547    r->width = m->width;
548    r->neg = 0;
549  }
550  BN_CTX_end(ctx);
551  return ok;
552}
553
554int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
555                     const BIGNUM *m) {
556  BN_CTX *ctx = BN_CTX_new();
557  int ok = ctx != NULL && bn_mod_sub_consttime(r, a, b, m, ctx);
558  BN_CTX_free(ctx);
559  return ok;
560}
561
562int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
563               BN_CTX *ctx) {
564  BIGNUM *t;
565  int ret = 0;
566
567  BN_CTX_start(ctx);
568  t = BN_CTX_get(ctx);
569  if (t == NULL) {
570    goto err;
571  }
572
573  if (a == b) {
574    if (!BN_sqr(t, a, ctx)) {
575      goto err;
576    }
577  } else {
578    if (!BN_mul(t, a, b, ctx)) {
579      goto err;
580    }
581  }
582
583  if (!BN_nnmod(r, t, m, ctx)) {
584    goto err;
585  }
586
587  ret = 1;
588
589err:
590  BN_CTX_end(ctx);
591  return ret;
592}
593
594int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
595  if (!BN_sqr(r, a, ctx)) {
596    return 0;
597  }
598
599  // r->neg == 0,  thus we don't need BN_nnmod
600  return BN_mod(r, r, m, ctx);
601}
602
603int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
604                  BN_CTX *ctx) {
605  BIGNUM *abs_m = NULL;
606  int ret;
607
608  if (!BN_nnmod(r, a, m, ctx)) {
609    return 0;
610  }
611
612  if (m->neg) {
613    abs_m = BN_dup(m);
614    if (abs_m == NULL) {
615      return 0;
616    }
617    abs_m->neg = 0;
618  }
619
620  ret = bn_mod_lshift_consttime(r, r, n, (abs_m ? abs_m : m), ctx);
621
622  BN_free(abs_m);
623  return ret;
624}
625
626int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
627                            BN_CTX *ctx) {
628  if (!BN_copy(r, a) || !bn_resize_words(r, m->width)) {
629    return 0;
630  }
631
632  BN_CTX_start(ctx);
633  BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
634  int ok = tmp != NULL;
635  if (ok) {
636    for (int i = 0; i < n; i++) {
637      bn_mod_add_words(r->d, r->d, r->d, m->d, tmp->d, m->width);
638    }
639    r->neg = 0;
640  }
641  BN_CTX_end(ctx);
642  return ok;
643}
644
645int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m) {
646  BN_CTX *ctx = BN_CTX_new();
647  int ok = ctx != NULL && bn_mod_lshift_consttime(r, a, n, m, ctx);
648  BN_CTX_free(ctx);
649  return ok;
650}
651
652int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
653  if (!BN_lshift1(r, a)) {
654    return 0;
655  }
656
657  return BN_nnmod(r, r, m, ctx);
658}
659
660int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
661                             BN_CTX *ctx) {
662  return bn_mod_add_consttime(r, a, a, m, ctx);
663}
664
665int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m) {
666  BN_CTX *ctx = BN_CTX_new();
667  int ok = ctx != NULL && bn_mod_lshift1_consttime(r, a, m, ctx);
668  BN_CTX_free(ctx);
669  return ok;
670}
671
672BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) {
673  BN_ULONG ret = 0;
674  int i, j;
675
676  if (!w) {
677    // actually this an error (division by zero)
678    return (BN_ULONG)-1;
679  }
680
681  if (a->width == 0) {
682    return 0;
683  }
684
685  // normalize input for |bn_div_rem_words|.
686  j = BN_BITS2 - BN_num_bits_word(w);
687  w <<= j;
688  if (!BN_lshift(a, a, j)) {
689    return (BN_ULONG)-1;
690  }
691
692  for (i = a->width - 1; i >= 0; i--) {
693    BN_ULONG l = a->d[i];
694    BN_ULONG d;
695    BN_ULONG unused_rem;
696    bn_div_rem_words(&d, &unused_rem, ret, l, w);
697    ret = l - (d * w);
698    a->d[i] = d;
699  }
700
701  bn_set_minimal_width(a);
702  ret >>= j;
703  return ret;
704}
705
706BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) {
707#ifndef BN_CAN_DIVIDE_ULLONG
708  BN_ULONG ret = 0;
709#else
710  BN_ULLONG ret = 0;
711#endif
712  int i;
713
714  if (w == 0) {
715    return (BN_ULONG)-1;
716  }
717
718#ifndef BN_CAN_DIVIDE_ULLONG
719  // If |w| is too long and we don't have |BN_ULLONG| division then we need to
720  // fall back to using |BN_div_word|.
721  if (w > ((BN_ULONG)1 << BN_BITS4)) {
722    BIGNUM *tmp = BN_dup(a);
723    if (tmp == NULL) {
724      return (BN_ULONG)-1;
725    }
726    ret = BN_div_word(tmp, w);
727    BN_free(tmp);
728    return ret;
729  }
730#endif
731
732  for (i = a->width - 1; i >= 0; i--) {
733#ifndef BN_CAN_DIVIDE_ULLONG
734    ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
735    ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
736#else
737    ret = (BN_ULLONG)(((ret << (BN_ULLONG)BN_BITS2) | a->d[i]) % (BN_ULLONG)w);
738#endif
739  }
740  return (BN_ULONG)ret;
741}
742
743int BN_mod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
744  if (e == 0 || a->width == 0) {
745    BN_zero(r);
746    return 1;
747  }
748
749  size_t num_words = 1 + ((e - 1) / BN_BITS2);
750
751  // If |a| definitely has less than |e| bits, just BN_copy.
752  if ((size_t)a->width < num_words) {
753    return BN_copy(r, a) != NULL;
754  }
755
756  // Otherwise, first make sure we have enough space in |r|.
757  // Note that this will fail if num_words > INT_MAX.
758  if (!bn_wexpand(r, num_words)) {
759    return 0;
760  }
761
762  // Copy the content of |a| into |r|.
763  OPENSSL_memcpy(r->d, a->d, num_words * sizeof(BN_ULONG));
764
765  // If |e| isn't word-aligned, we have to mask off some of our bits.
766  size_t top_word_exponent = e % (sizeof(BN_ULONG) * 8);
767  if (top_word_exponent != 0) {
768    r->d[num_words - 1] &= (((BN_ULONG)1) << top_word_exponent) - 1;
769  }
770
771  // Fill in the remaining fields of |r|.
772  r->neg = a->neg;
773  r->width = (int)num_words;
774  bn_set_minimal_width(r);
775  return 1;
776}
777
778int BN_nnmod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
779  if (!BN_mod_pow2(r, a, e)) {
780    return 0;
781  }
782
783  // If the returned value was non-negative, we're done.
784  if (BN_is_zero(r) || !r->neg) {
785    return 1;
786  }
787
788  size_t num_words = 1 + (e - 1) / BN_BITS2;
789
790  // Expand |r| to the size of our modulus.
791  if (!bn_wexpand(r, num_words)) {
792    return 0;
793  }
794
795  // Clear the upper words of |r|.
796  OPENSSL_memset(&r->d[r->width], 0, (num_words - r->width) * BN_BYTES);
797
798  // Set parameters of |r|.
799  r->neg = 0;
800  r->width = (int)num_words;
801
802  // Now, invert every word. The idea here is that we want to compute 2^e-|x|,
803  // which is actually equivalent to the twos-complement representation of |x|
804  // in |e| bits, which is -x = ~x + 1.
805  for (int i = 0; i < r->width; i++) {
806    r->d[i] = ~r->d[i];
807  }
808
809  // If our exponent doesn't span the top word, we have to mask the rest.
810  size_t top_word_exponent = e % BN_BITS2;
811  if (top_word_exponent != 0) {
812    r->d[r->width - 1] &= (((BN_ULONG)1) << top_word_exponent) - 1;
813  }
814
815  // Keep the minimal-width invariant for |BIGNUM|.
816  bn_set_minimal_width(r);
817
818  // Finally, add one, for the reason described above.
819  return BN_add(r, r, BN_value_one());
820}
821