• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Adapted from https://github.com/Alexhuszagh/rust-lexical.
2 
3 //! Building-blocks for arbitrary-precision math.
4 //!
5 //! These algorithms assume little-endian order for the large integer
6 //! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb,
7 //! and `0` is the least significant limb.
8 
9 use super::large_powers;
10 use super::num::*;
11 use super::small_powers::*;
12 use alloc::vec::Vec;
13 use core::{cmp, iter, mem};
14 
15 // ALIASES
16 // -------
17 
18 //  Type for a single limb of the big integer.
19 //
20 //  A limb is analogous to a digit in base10, except, it stores 32-bit
21 //  or 64-bit numbers instead.
22 //
23 //  This should be all-known 64-bit platforms supported by Rust.
24 //      https://forge.rust-lang.org/platform-support.html
25 //
26 //  Platforms where native 128-bit multiplication is explicitly supported:
27 //      - x86_64 (Supported via `MUL`).
28 //      - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
29 //
30 //  Platforms where native 64-bit multiplication is supported and
31 //  you can extract hi-lo for 64-bit multiplications.
32 //      aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
33 //      powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
34 //
35 //  Platforms where native 128-bit multiplication is not supported,
36 //  requiring software emulation.
37 //      sparc64 (`UMUL` only supported double-word arguments).
38 
39 // 32-BIT LIMB
40 #[cfg(fast_arithmetic = "32")]
41 pub type Limb = u32;
42 
43 #[cfg(fast_arithmetic = "32")]
44 pub const POW5_LIMB: &[Limb] = &POW5_32;
45 
46 #[cfg(fast_arithmetic = "32")]
47 pub const POW10_LIMB: &[Limb] = &POW10_32;
48 
49 #[cfg(fast_arithmetic = "32")]
50 type Wide = u64;
51 
52 // 64-BIT LIMB
53 #[cfg(fast_arithmetic = "64")]
54 pub type Limb = u64;
55 
56 #[cfg(fast_arithmetic = "64")]
57 pub const POW5_LIMB: &[Limb] = &POW5_64;
58 
59 #[cfg(fast_arithmetic = "64")]
60 pub const POW10_LIMB: &[Limb] = &POW10_64;
61 
62 #[cfg(fast_arithmetic = "64")]
63 type Wide = u128;
64 
65 /// Cast to limb type.
66 #[inline]
as_limb<T: Integer>(t: T) -> Limb67 pub(crate) fn as_limb<T: Integer>(t: T) -> Limb {
68     Limb::as_cast(t)
69 }
70 
71 /// Cast to wide type.
72 #[inline]
as_wide<T: Integer>(t: T) -> Wide73 fn as_wide<T: Integer>(t: T) -> Wide {
74     Wide::as_cast(t)
75 }
76 
77 // SPLIT
78 // -----
79 
80 /// Split u64 into limbs, in little-endian order.
81 #[inline]
82 #[cfg(fast_arithmetic = "32")]
split_u64(x: u64) -> [Limb; 2]83 fn split_u64(x: u64) -> [Limb; 2] {
84     [as_limb(x), as_limb(x >> 32)]
85 }
86 
87 /// Split u64 into limbs, in little-endian order.
88 #[inline]
89 #[cfg(fast_arithmetic = "64")]
split_u64(x: u64) -> [Limb; 1]90 fn split_u64(x: u64) -> [Limb; 1] {
91     [as_limb(x)]
92 }
93 
94 // HI64
95 // ----
96 
97 // NONZERO
98 
99 /// Check if any of the remaining bits are non-zero.
100 #[inline]
nonzero<T: Integer>(x: &[T], rindex: usize) -> bool101 pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool {
102     let len = x.len();
103     let slc = &x[..len - rindex];
104     slc.iter().rev().any(|&x| x != T::ZERO)
105 }
106 
107 /// Shift 64-bit integer to high 64-bits.
108 #[inline]
u64_to_hi64_1(r0: u64) -> (u64, bool)109 fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
110     debug_assert!(r0 != 0);
111     let ls = r0.leading_zeros();
112     (r0 << ls, false)
113 }
114 
115 /// Shift 2 64-bit integers to high 64-bits.
116 #[inline]
u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool)117 fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
118     debug_assert!(r0 != 0);
119     let ls = r0.leading_zeros();
120     let rs = 64 - ls;
121     let v = match ls {
122         0 => r0,
123         _ => (r0 << ls) | (r1 >> rs),
124     };
125     let n = r1 << ls != 0;
126     (v, n)
127 }
128 
129 /// Trait to export the high 64-bits from a little-endian slice.
130 trait Hi64<T>: AsRef<[T]> {
131     /// Get the hi64 bits from a 1-limb slice.
hi64_1(&self) -> (u64, bool)132     fn hi64_1(&self) -> (u64, bool);
133 
134     /// Get the hi64 bits from a 2-limb slice.
hi64_2(&self) -> (u64, bool)135     fn hi64_2(&self) -> (u64, bool);
136 
137     /// Get the hi64 bits from a 3-limb slice.
hi64_3(&self) -> (u64, bool)138     fn hi64_3(&self) -> (u64, bool);
139 
140     /// High-level exporter to extract the high 64 bits from a little-endian slice.
141     #[inline]
hi64(&self) -> (u64, bool)142     fn hi64(&self) -> (u64, bool) {
143         match self.as_ref().len() {
144             0 => (0, false),
145             1 => self.hi64_1(),
146             2 => self.hi64_2(),
147             _ => self.hi64_3(),
148         }
149     }
150 }
151 
152 impl Hi64<u32> for [u32] {
153     #[inline]
hi64_1(&self) -> (u64, bool)154     fn hi64_1(&self) -> (u64, bool) {
155         debug_assert!(self.len() == 1);
156         let r0 = self[0] as u64;
157         u64_to_hi64_1(r0)
158     }
159 
160     #[inline]
hi64_2(&self) -> (u64, bool)161     fn hi64_2(&self) -> (u64, bool) {
162         debug_assert!(self.len() == 2);
163         let r0 = (self[1] as u64) << 32;
164         let r1 = self[0] as u64;
165         u64_to_hi64_1(r0 | r1)
166     }
167 
168     #[inline]
hi64_3(&self) -> (u64, bool)169     fn hi64_3(&self) -> (u64, bool) {
170         debug_assert!(self.len() >= 3);
171         let r0 = self[self.len() - 1] as u64;
172         let r1 = (self[self.len() - 2] as u64) << 32;
173         let r2 = self[self.len() - 3] as u64;
174         let (v, n) = u64_to_hi64_2(r0, r1 | r2);
175         (v, n || nonzero(self, 3))
176     }
177 }
178 
179 impl Hi64<u64> for [u64] {
180     #[inline]
hi64_1(&self) -> (u64, bool)181     fn hi64_1(&self) -> (u64, bool) {
182         debug_assert!(self.len() == 1);
183         let r0 = self[0];
184         u64_to_hi64_1(r0)
185     }
186 
187     #[inline]
hi64_2(&self) -> (u64, bool)188     fn hi64_2(&self) -> (u64, bool) {
189         debug_assert!(self.len() >= 2);
190         let r0 = self[self.len() - 1];
191         let r1 = self[self.len() - 2];
192         let (v, n) = u64_to_hi64_2(r0, r1);
193         (v, n || nonzero(self, 2))
194     }
195 
196     #[inline]
hi64_3(&self) -> (u64, bool)197     fn hi64_3(&self) -> (u64, bool) {
198         self.hi64_2()
199     }
200 }
201 
202 // SCALAR
203 // ------
204 
205 // Scalar-to-scalar operations, for building-blocks for arbitrary-precision
206 // operations.
207 
208 mod scalar {
209     use super::*;
210 
211     // ADDITION
212 
213     /// Add two small integers and return the resulting value and if overflow happens.
214     #[inline]
add(x: Limb, y: Limb) -> (Limb, bool)215     pub fn add(x: Limb, y: Limb) -> (Limb, bool) {
216         x.overflowing_add(y)
217     }
218 
219     /// AddAssign two small integers and return if overflow happens.
220     #[inline]
iadd(x: &mut Limb, y: Limb) -> bool221     pub fn iadd(x: &mut Limb, y: Limb) -> bool {
222         let t = add(*x, y);
223         *x = t.0;
224         t.1
225     }
226 
227     // SUBTRACTION
228 
229     /// Subtract two small integers and return the resulting value and if overflow happens.
230     #[inline]
sub(x: Limb, y: Limb) -> (Limb, bool)231     pub fn sub(x: Limb, y: Limb) -> (Limb, bool) {
232         x.overflowing_sub(y)
233     }
234 
235     /// SubAssign two small integers and return if overflow happens.
236     #[inline]
isub(x: &mut Limb, y: Limb) -> bool237     pub fn isub(x: &mut Limb, y: Limb) -> bool {
238         let t = sub(*x, y);
239         *x = t.0;
240         t.1
241     }
242 
243     // MULTIPLICATION
244 
245     /// Multiply two small integers (with carry) (and return the overflow contribution).
246     ///
247     /// Returns the (low, high) components.
248     #[inline]
mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb)249     pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
250         // Cannot overflow, as long as wide is 2x as wide. This is because
251         // the following is always true:
252         // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()`
253         let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry);
254         let bits = mem::size_of::<Limb>() * 8;
255         (as_limb(z), as_limb(z >> bits))
256     }
257 
258     /// Multiply two small integers (with carry) (and return if overflow happens).
259     #[inline]
imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb260     pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb {
261         let t = mul(*x, y, carry);
262         *x = t.0;
263         t.1
264     }
265 } // scalar
266 
267 // SMALL
268 // -----
269 
270 // Large-to-small operations, to modify a big integer from a native scalar.
271 
272 mod small {
273     use super::*;
274 
275     // ADDITION
276 
277     /// Implied AddAssign implementation for adding a small integer to bigint.
278     ///
279     /// Allows us to choose a start-index in x to store, to allow incrementing
280     /// from a non-zero start.
281     #[inline]
iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)282     pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
283         if x.len() <= xstart {
284             x.push(y);
285         } else {
286             // Initial add
287             let mut carry = scalar::iadd(&mut x[xstart], y);
288 
289             // Increment until overflow stops occurring.
290             let mut size = xstart + 1;
291             while carry && size < x.len() {
292                 carry = scalar::iadd(&mut x[size], 1);
293                 size += 1;
294             }
295 
296             // If we overflowed the buffer entirely, need to add 1 to the end
297             // of the buffer.
298             if carry {
299                 x.push(1);
300             }
301         }
302     }
303 
304     /// AddAssign small integer to bigint.
305     #[inline]
iadd(x: &mut Vec<Limb>, y: Limb)306     pub fn iadd(x: &mut Vec<Limb>, y: Limb) {
307         iadd_impl(x, y, 0);
308     }
309 
310     // SUBTRACTION
311 
312     /// SubAssign small integer to bigint.
313     /// Does not do overflowing subtraction.
314     #[inline]
isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize)315     pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
316         debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1));
317 
318         // Initial subtraction
319         let mut carry = scalar::isub(&mut x[xstart], y);
320 
321         // Increment until overflow stops occurring.
322         let mut size = xstart + 1;
323         while carry && size < x.len() {
324             carry = scalar::isub(&mut x[size], 1);
325             size += 1;
326         }
327         normalize(x);
328     }
329 
330     // MULTIPLICATION
331 
332     /// MulAssign small integer to bigint.
333     #[inline]
imul(x: &mut Vec<Limb>, y: Limb)334     pub fn imul(x: &mut Vec<Limb>, y: Limb) {
335         // Multiply iteratively over all elements, adding the carry each time.
336         let mut carry: Limb = 0;
337         for xi in &mut *x {
338             carry = scalar::imul(xi, y, carry);
339         }
340 
341         // Overflow of value, add to end.
342         if carry != 0 {
343             x.push(carry);
344         }
345     }
346 
347     /// Mul small integer to bigint.
348     #[inline]
mul(x: &[Limb], y: Limb) -> Vec<Limb>349     pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> {
350         let mut z = Vec::<Limb>::default();
351         z.extend_from_slice(x);
352         imul(&mut z, y);
353         z
354     }
355 
356     /// MulAssign by a power.
357     ///
358     /// Theoretically...
359     ///
360     /// Use an exponentiation by squaring method, since it reduces the time
361     /// complexity of the multiplication to ~`O(log(n))` for the squaring,
362     /// and `O(n*m)` for the result. Since `m` is typically a lower-order
363     /// factor, this significantly reduces the number of multiplications
364     /// we need to do. Iteratively multiplying by small powers follows
365     /// the nth triangular number series, which scales as `O(p^2)`, but
366     /// where `p` is `n+m`. In short, it scales very poorly.
367     ///
368     /// Practically....
369     ///
370     /// Exponentiation by Squaring:
371     ///     running 2 tests
372     ///     test bigcomp_f32_lexical ... bench:       1,018 ns/iter (+/- 78)
373     ///     test bigcomp_f64_lexical ... bench:       3,639 ns/iter (+/- 1,007)
374     ///
375     /// Exponentiation by Iterative Small Powers:
376     ///     running 2 tests
377     ///     test bigcomp_f32_lexical ... bench:         518 ns/iter (+/- 31)
378     ///     test bigcomp_f64_lexical ... bench:         583 ns/iter (+/- 47)
379     ///
380     /// Exponentiation by Iterative Large Powers (of 2):
381     ///     running 2 tests
382     ///     test bigcomp_f32_lexical ... bench:         671 ns/iter (+/- 31)
383     ///     test bigcomp_f64_lexical ... bench:       1,394 ns/iter (+/- 47)
384     ///
385     /// Even using worst-case scenarios, exponentiation by squaring is
386     /// significantly slower for our workloads. Just multiply by small powers,
387     /// in simple cases, and use precalculated large powers in other cases.
imul_pow5(x: &mut Vec<Limb>, n: u32)388     pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) {
389         use super::large::KARATSUBA_CUTOFF;
390 
391         let small_powers = POW5_LIMB;
392         let large_powers = large_powers::POW5;
393 
394         if n == 0 {
395             // No exponent, just return.
396             // The 0-index of the large powers is `2^0`, which is 1, so we want
397             // to make sure we don't take that path with a literal 0.
398             return;
399         }
400 
401         // We want to use the asymptotically faster algorithm if we're going
402         // to be using Karabatsu multiplication sometime during the result,
403         // otherwise, just use exponentiation by squaring.
404         let bit_length = 32 - n.leading_zeros() as usize;
405         debug_assert!(bit_length != 0 && bit_length <= large_powers.len());
406         if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF {
407             // We can use iterative small powers to make this faster for the
408             // easy cases.
409 
410             // Multiply by the largest small power until n < step.
411             let step = small_powers.len() - 1;
412             let power = small_powers[step];
413             let mut n = n as usize;
414             while n >= step {
415                 imul(x, power);
416                 n -= step;
417             }
418 
419             // Multiply by the remainder.
420             imul(x, small_powers[n]);
421         } else {
422             // In theory, this code should be asymptotically a lot faster,
423             // in practice, our small::imul seems to be the limiting step,
424             // and large imul is slow as well.
425 
426             // Multiply by higher order powers.
427             let mut idx: usize = 0;
428             let mut bit: usize = 1;
429             let mut n = n as usize;
430             while n != 0 {
431                 if n & bit != 0 {
432                     debug_assert!(idx < large_powers.len());
433                     large::imul(x, large_powers[idx]);
434                     n ^= bit;
435                 }
436                 idx += 1;
437                 bit <<= 1;
438             }
439         }
440     }
441 
442     // BIT LENGTH
443 
444     /// Get number of leading zero bits in the storage.
445     #[inline]
leading_zeros(x: &[Limb]) -> usize446     pub fn leading_zeros(x: &[Limb]) -> usize {
447         x.last().map_or(0, |x| x.leading_zeros() as usize)
448     }
449 
450     /// Calculate the bit-length of the big-integer.
451     #[inline]
bit_length(x: &[Limb]) -> usize452     pub fn bit_length(x: &[Limb]) -> usize {
453         let bits = mem::size_of::<Limb>() * 8;
454         // Avoid overflowing, calculate via total number of bits
455         // minus leading zero bits.
456         let nlz = leading_zeros(x);
457         bits.checked_mul(x.len())
458             .map_or_else(usize::max_value, |v| v - nlz)
459     }
460 
461     // SHL
462 
463     /// Shift-left bits inside a buffer.
464     ///
465     /// Assumes `n < Limb::BITS`, IE, internally shifting bits.
466     #[inline]
ishl_bits(x: &mut Vec<Limb>, n: usize)467     pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) {
468         // Need to shift by the number of `bits % Limb::BITS)`.
469         let bits = mem::size_of::<Limb>() * 8;
470         debug_assert!(n < bits);
471         if n == 0 {
472             return;
473         }
474 
475         // Internally, for each item, we shift left by n, and add the previous
476         // right shifted limb-bits.
477         // For example, we transform (for u8) shifted left 2, to:
478         //      b10100100 b01000010
479         //      b10 b10010001 b00001000
480         let rshift = bits - n;
481         let lshift = n;
482         let mut prev: Limb = 0;
483         for xi in &mut *x {
484             let tmp = *xi;
485             *xi <<= lshift;
486             *xi |= prev >> rshift;
487             prev = tmp;
488         }
489 
490         // Always push the carry, even if it creates a non-normal result.
491         let carry = prev >> rshift;
492         if carry != 0 {
493             x.push(carry);
494         }
495     }
496 
497     /// Shift-left `n` digits inside a buffer.
498     ///
499     /// Assumes `n` is not 0.
500     #[inline]
ishl_limbs(x: &mut Vec<Limb>, n: usize)501     pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) {
502         debug_assert!(n != 0);
503         if !x.is_empty() {
504             x.reserve(n);
505             x.splice(..0, iter::repeat(0).take(n));
506         }
507     }
508 
509     /// Shift-left buffer by n bits.
510     #[inline]
ishl(x: &mut Vec<Limb>, n: usize)511     pub fn ishl(x: &mut Vec<Limb>, n: usize) {
512         let bits = mem::size_of::<Limb>() * 8;
513         // Need to pad with zeros for the number of `bits / Limb::BITS`,
514         // and shift-left with carry for `bits % Limb::BITS`.
515         let rem = n % bits;
516         let div = n / bits;
517         ishl_bits(x, rem);
518         if div != 0 {
519             ishl_limbs(x, div);
520         }
521     }
522 
523     // NORMALIZE
524 
525     /// Normalize the container by popping any leading zeros.
526     #[inline]
normalize(x: &mut Vec<Limb>)527     pub fn normalize(x: &mut Vec<Limb>) {
528         // Remove leading zero if we cause underflow. Since we're dividing
529         // by a small power, we have at max 1 int removed.
530         while x.last() == Some(&0) {
531             x.pop();
532         }
533     }
534 } // small
535 
536 // LARGE
537 // -----
538 
539 // Large-to-large operations, to modify a big integer from a native scalar.
540 
541 mod large {
542     use super::*;
543 
544     // RELATIVE OPERATORS
545 
546     /// Compare `x` to `y`, in little-endian order.
547     #[inline]
compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering548     pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
549         if x.len() > y.len() {
550             cmp::Ordering::Greater
551         } else if x.len() < y.len() {
552             cmp::Ordering::Less
553         } else {
554             let iter = x.iter().rev().zip(y.iter().rev());
555             for (&xi, &yi) in iter {
556                 if xi > yi {
557                     return cmp::Ordering::Greater;
558                 } else if xi < yi {
559                     return cmp::Ordering::Less;
560                 }
561             }
562             // Equal case.
563             cmp::Ordering::Equal
564         }
565     }
566 
567     /// Check if x is less than y.
568     #[inline]
less(x: &[Limb], y: &[Limb]) -> bool569     pub fn less(x: &[Limb], y: &[Limb]) -> bool {
570         compare(x, y) == cmp::Ordering::Less
571     }
572 
573     /// Check if x is greater than or equal to y.
574     #[inline]
greater_equal(x: &[Limb], y: &[Limb]) -> bool575     pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool {
576         !less(x, y)
577     }
578 
579     // ADDITION
580 
581     /// Implied AddAssign implementation for bigints.
582     ///
583     /// Allows us to choose a start-index in x to store, so we can avoid
584     /// padding the buffer with zeros when not needed, optimized for vectors.
iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize)585     pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) {
586         // The effective x buffer is from `xstart..x.len()`, so we need to treat
587         // that as the current range. If the effective y buffer is longer, need
588         // to resize to that, + the start index.
589         if y.len() > x.len() - xstart {
590             x.resize(y.len() + xstart, 0);
591         }
592 
593         // Iteratively add elements from y to x.
594         let mut carry = false;
595         for (xi, yi) in x[xstart..].iter_mut().zip(y.iter()) {
596             // Only one op of the two can overflow, since we added at max
597             // Limb::max_value() + Limb::max_value(). Add the previous carry,
598             // and store the current carry for the next.
599             let mut tmp = scalar::iadd(xi, *yi);
600             if carry {
601                 tmp |= scalar::iadd(xi, 1);
602             }
603             carry = tmp;
604         }
605 
606         // Overflow from the previous bit.
607         if carry {
608             small::iadd_impl(x, 1, y.len() + xstart);
609         }
610     }
611 
612     /// AddAssign bigint to bigint.
613     #[inline]
iadd(x: &mut Vec<Limb>, y: &[Limb])614     pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) {
615         iadd_impl(x, y, 0);
616     }
617 
618     /// Add bigint to bigint.
619     #[inline]
add(x: &[Limb], y: &[Limb]) -> Vec<Limb>620     pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
621         let mut z = Vec::<Limb>::default();
622         z.extend_from_slice(x);
623         iadd(&mut z, y);
624         z
625     }
626 
627     // SUBTRACTION
628 
629     /// SubAssign bigint to bigint.
isub(x: &mut Vec<Limb>, y: &[Limb])630     pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) {
631         // Basic underflow checks.
632         debug_assert!(greater_equal(x, y));
633 
634         // Iteratively add elements from y to x.
635         let mut carry = false;
636         for (xi, yi) in x.iter_mut().zip(y.iter()) {
637             // Only one op of the two can overflow, since we added at max
638             // Limb::max_value() + Limb::max_value(). Add the previous carry,
639             // and store the current carry for the next.
640             let mut tmp = scalar::isub(xi, *yi);
641             if carry {
642                 tmp |= scalar::isub(xi, 1);
643             }
644             carry = tmp;
645         }
646 
647         if carry {
648             small::isub_impl(x, 1, y.len());
649         } else {
650             small::normalize(x);
651         }
652     }
653 
654     // MULTIPLICATION
655 
656     /// Number of digits to bottom-out to asymptotically slow algorithms.
657     ///
658     /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits,
659     /// so we go halfway, while Newton division tends to out-perform
660     /// Algorithm D at ~1024 bits. We can toggle this for optimal performance.
661     pub const KARATSUBA_CUTOFF: usize = 32;
662 
663     /// Grade-school multiplication algorithm.
664     ///
665     /// Slow, naive algorithm, using limb-bit bases and just shifting left for
666     /// each iteration. This could be optimized with numerous other algorithms,
667     /// but it's extremely simple, and works in O(n*m) time, which is fine
668     /// by me. Each iteration, of which there are `m` iterations, requires
669     /// `n` multiplications, and `n` additions, or grade-school multiplication.
long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>670     fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
671         // Using the immutable value, multiply by all the scalars in y, using
672         // the algorithm defined above. Use a single buffer to avoid
673         // frequent reallocations. Handle the first case to avoid a redundant
674         // addition, since we know y.len() >= 1.
675         let mut z: Vec<Limb> = small::mul(x, y[0]);
676         z.resize(x.len() + y.len(), 0);
677 
678         // Handle the iterative cases.
679         for (i, &yi) in y[1..].iter().enumerate() {
680             let zi: Vec<Limb> = small::mul(x, yi);
681             iadd_impl(&mut z, &zi, i + 1);
682         }
683 
684         small::normalize(&mut z);
685 
686         z
687     }
688 
689     /// Split two buffers into halfway, into (lo, hi).
690     #[inline]
karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb])691     pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) {
692         (&z[..m], &z[m..])
693     }
694 
695     /// Karatsuba multiplication algorithm with roughly equal input sizes.
696     ///
697     /// Assumes `y.len() >= x.len()`.
karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb>698     fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
699         if y.len() <= KARATSUBA_CUTOFF {
700             // Bottom-out to long division for small cases.
701             long_mul(x, y)
702         } else if x.len() < y.len() / 2 {
703             karatsuba_uneven_mul(x, y)
704         } else {
705             // Do our 3 multiplications.
706             let m = y.len() / 2;
707             let (xl, xh) = karatsuba_split(x, m);
708             let (yl, yh) = karatsuba_split(y, m);
709             let sumx = add(xl, xh);
710             let sumy = add(yl, yh);
711             let z0 = karatsuba_mul(xl, yl);
712             let mut z1 = karatsuba_mul(&sumx, &sumy);
713             let z2 = karatsuba_mul(xh, yh);
714             // Properly scale z1, which is `z1 - z2 - zo`.
715             isub(&mut z1, &z2);
716             isub(&mut z1, &z0);
717 
718             // Create our result, which is equal to, in little-endian order:
719             // [z0, z1 - z2 - z0, z2]
720             //  z1 must be shifted m digits (2^(32m)) over.
721             //  z2 must be shifted 2*m digits (2^(64m)) over.
722             let len = z0.len().max(m + z1.len()).max(2 * m + z2.len());
723             let mut result = z0;
724             result.reserve_exact(len - result.len());
725             iadd_impl(&mut result, &z1, m);
726             iadd_impl(&mut result, &z2, 2 * m);
727 
728             result
729         }
730     }
731 
732     /// Karatsuba multiplication algorithm where y is substantially larger than x.
733     ///
734     /// Assumes `y.len() >= x.len()`.
karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb>735     fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> {
736         let mut result = Vec::<Limb>::default();
737         result.resize(x.len() + y.len(), 0);
738 
739         // This effectively is like grade-school multiplication between
740         // two numbers, except we're using splits on `y`, and the intermediate
741         // step is a Karatsuba multiplication.
742         let mut start = 0;
743         while !y.is_empty() {
744             let m = x.len().min(y.len());
745             let (yl, yh) = karatsuba_split(y, m);
746             let prod = karatsuba_mul(x, yl);
747             iadd_impl(&mut result, &prod, start);
748             y = yh;
749             start += m;
750         }
751         small::normalize(&mut result);
752 
753         result
754     }
755 
756     /// Forwarder to the proper Karatsuba algorithm.
757     #[inline]
karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb>758     fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
759         if x.len() < y.len() {
760             karatsuba_mul(x, y)
761         } else {
762             karatsuba_mul(y, x)
763         }
764     }
765 
766     /// MulAssign bigint to bigint.
767     #[inline]
imul(x: &mut Vec<Limb>, y: &[Limb])768     pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) {
769         if y.len() == 1 {
770             small::imul(x, y[0]);
771         } else {
772             // We're not really in a condition where using Karatsuba
773             // multiplication makes sense, so we're just going to use long
774             // division. ~20% speedup compared to:
775             //      *x = karatsuba_mul_fwd(x, y);
776             *x = karatsuba_mul_fwd(x, y);
777         }
778     }
779 } // large
780 
781 // TRAITS
782 // ------
783 
784 /// Traits for shared operations for big integers.
785 ///
786 /// None of these are implemented using normal traits, since these
787 /// are very expensive operations, and we want to deliberately
788 /// and explicitly use these functions.
789 pub(crate) trait Math: Clone + Sized + Default {
790     // DATA
791 
792     /// Get access to the underlying data
data(&self) -> &Vec<Limb>793     fn data(&self) -> &Vec<Limb>;
794 
795     /// Get access to the underlying data
data_mut(&mut self) -> &mut Vec<Limb>796     fn data_mut(&mut self) -> &mut Vec<Limb>;
797 
798     // RELATIVE OPERATIONS
799 
800     /// Compare self to y.
801     #[inline]
compare(&self, y: &Self) -> cmp::Ordering802     fn compare(&self, y: &Self) -> cmp::Ordering {
803         large::compare(self.data(), y.data())
804     }
805 
806     // PROPERTIES
807 
808     /// Get the high 64-bits from the bigint and if there are remaining bits.
809     #[inline]
hi64(&self) -> (u64, bool)810     fn hi64(&self) -> (u64, bool) {
811         self.data().as_slice().hi64()
812     }
813 
814     /// Calculate the bit-length of the big-integer.
815     /// Returns usize::max_value() if the value overflows,
816     /// IE, if `self.data().len() > usize::max_value() / 8`.
817     #[inline]
bit_length(&self) -> usize818     fn bit_length(&self) -> usize {
819         small::bit_length(self.data())
820     }
821 
822     // INTEGER CONVERSIONS
823 
824     /// Create new big integer from u64.
825     #[inline]
from_u64(x: u64) -> Self826     fn from_u64(x: u64) -> Self {
827         let mut v = Self::default();
828         let slc = split_u64(x);
829         v.data_mut().extend_from_slice(&slc);
830         v.normalize();
831         v
832     }
833 
834     // NORMALIZE
835 
836     /// Normalize the integer, so any leading zero values are removed.
837     #[inline]
normalize(&mut self)838     fn normalize(&mut self) {
839         small::normalize(self.data_mut());
840     }
841 
842     // ADDITION
843 
844     /// AddAssign small integer.
845     #[inline]
iadd_small(&mut self, y: Limb)846     fn iadd_small(&mut self, y: Limb) {
847         small::iadd(self.data_mut(), y);
848     }
849 
850     // MULTIPLICATION
851 
852     /// MulAssign small integer.
853     #[inline]
imul_small(&mut self, y: Limb)854     fn imul_small(&mut self, y: Limb) {
855         small::imul(self.data_mut(), y);
856     }
857 
858     /// Multiply by a power of 2.
859     #[inline]
imul_pow2(&mut self, n: u32)860     fn imul_pow2(&mut self, n: u32) {
861         self.ishl(n as usize);
862     }
863 
864     /// Multiply by a power of 5.
865     #[inline]
imul_pow5(&mut self, n: u32)866     fn imul_pow5(&mut self, n: u32) {
867         small::imul_pow5(self.data_mut(), n);
868     }
869 
870     /// MulAssign by a power of 10.
871     #[inline]
imul_pow10(&mut self, n: u32)872     fn imul_pow10(&mut self, n: u32) {
873         self.imul_pow5(n);
874         self.imul_pow2(n);
875     }
876 
877     // SHIFTS
878 
879     /// Shift-left the entire buffer n bits.
880     #[inline]
ishl(&mut self, n: usize)881     fn ishl(&mut self, n: usize) {
882         small::ishl(self.data_mut(), n);
883     }
884 }
885