• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Burnikel-Ziegler division.
6 // Reference: "Fast Recursive Division" by Christoph Burnikel and Joachim
7 // Ziegler, found at http://cr.yp.to/bib/1998/burnikel.ps
8 
9 #include <string.h>
10 
11 #include "src/bigint/bigint-internal.h"
12 #include "src/bigint/digit-arithmetic.h"
13 #include "src/bigint/div-helpers.h"
14 #include "src/bigint/util.h"
15 #include "src/bigint/vector-arithmetic.h"
16 
17 namespace v8 {
18 namespace bigint {
19 
20 namespace {
21 
22 // Compares [a_high, A] with B.
23 // Returns:
24 // - a value < 0 if [a_high, A] < B
25 // - 0           if [a_high, A] == B
26 // - a value > 0 if [a_high, A] > B.
SpecialCompare(digit_t a_high,Digits A,Digits B)27 int SpecialCompare(digit_t a_high, Digits A, Digits B) {
28   B.Normalize();
29   int a_len;
30   if (a_high == 0) {
31     A.Normalize();
32     a_len = A.len();
33   } else {
34     a_len = A.len() + 1;
35   }
36   int diff = a_len - B.len();
37   if (diff != 0) return diff;
38   int i = a_len - 1;
39   if (a_high != 0) {
40     if (a_high > B[i]) return 1;
41     if (a_high < B[i]) return -1;
42     i--;
43   }
44   while (i >= 0 && A[i] == B[i]) i--;
45   if (i < 0) return 0;
46   return A[i] > B[i] ? 1 : -1;
47 }
48 
SetOnes(RWDigits X)49 void SetOnes(RWDigits X) {
50   memset(X.digits(), 0xFF, X.len() * sizeof(digit_t));
51 }
52 
53 // Since the Burnikel-Ziegler method is inherently recursive, we put
54 // non-changing data into a container object.
55 class BZ {
56  public:
BZ(ProcessorImpl * proc,int scratch_space)57   BZ(ProcessorImpl* proc, int scratch_space)
58       : proc_(proc),
59         scratch_mem_(scratch_space >= kBurnikelThreshold ? scratch_space : 0) {}
60 
61   void DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B);
62   void D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B);
63   void D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B);
64 
65  private:
66   ProcessorImpl* proc_;
67   Storage scratch_mem_;
68 };
69 
DivideBasecase(RWDigits Q,RWDigits R,Digits A,Digits B)70 void BZ::DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B) {
71   A.Normalize();
72   B.Normalize();
73   DCHECK(B.len() > 0);
74   int cmp = Compare(A, B);
75   if (cmp <= 0) {
76     Q.Clear();
77     if (cmp == 0) {
78       // If A == B, then Q=1, R=0.
79       R.Clear();
80       Q[0] = 1;
81     } else {
82       // If A < B, then Q=0, R=A.
83       PutAt(R, A, R.len());
84     }
85     return;
86   }
87   if (B.len() == 1) {
88     return proc_->DivideSingle(Q, R.digits(), A, B[0]);
89   }
90   return proc_->DivideSchoolbook(Q, R, A, B);
91 }
92 
93 // Algorithm 2 from the paper. Variable names same as there.
94 // Returns Q(uotient) and R(emainder) for A/B, with B having two thirds
95 // the size of A = [A1, A2, A3].
D3n2n(RWDigits Q,RWDigits R,Digits A1A2,Digits A3,Digits B)96 void BZ::D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B) {
97   DCHECK((B.len() & 1) == 0);
98   int n = B.len() / 2;
99   DCHECK(A1A2.len() == 2 * n);
100   // Actual condition is stricter than length: A < B * 2^(kDigitBits * n)
101   DCHECK(Compare(A1A2, B) < 0);
102   DCHECK(A3.len() == n);
103   DCHECK(Q.len() == n);
104   DCHECK(R.len() == 2 * n);
105   // 1. Split A into three parts A = [A1, A2, A3] with Ai < 2^(kDigitBits * n).
106   Digits A1(A1A2, n, n);
107   // 2. Split B into two parts B = [B1, B2] with Bi < 2^(kDigitBits * n).
108   Digits B1(B, n, n);
109   Digits B2(B, 0, n);
110   // 3. Distinguish the cases A1 < B1 or A1 >= B1.
111   RWDigits Qhat = Q;
112   RWDigits R1(R, n, n);
113   digit_t r1_high = 0;
114   if (Compare(A1, B1) < 0) {
115     // 3a. If A1 < B1, compute Qhat = floor([A1, A2] / B1) with remainder R1
116     //     using algorithm D2n1n.
117     D2n1n(Qhat, R1, A1A2, B1);
118     if (proc_->should_terminate()) return;
119   } else {
120     // 3b. If A1 >= B1, set Qhat = 2^(kDigitBits * n) - 1 and set
121     //     R1 = [A1, A2] - [B1, 0] + [0, B1]
122     SetOnes(Qhat);
123     // Step 1: compute A1 - B1, which can't underflow because of the comparison
124     // guarding this else-branch, and always has a one-digit result because
125     // of this function's preconditions.
126     RWDigits temp = R1;
127     Subtract(temp, A1, B1);
128     temp.Normalize();
129     DCHECK(temp.len() <= 1);
130     if (temp.len() > 0) r1_high = temp[0];
131     // Step 2: compute A2 + B1.
132     Digits A2(A1A2, 0, n);
133     r1_high += AddAndReturnCarry(R1, A2, B1);
134   }
135   // 4. Compute D = Qhat * B2 using (Karatsuba) multiplication.
136   RWDigits D(scratch_mem_.get(), 2 * n);
137   proc_->Multiply(D, Qhat, B2);
138   if (proc_->should_terminate()) return;
139 
140   // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.
141   PutAt(R, A3, n);
142   // 6. As long as Rhat < 0, repeat:
143   while (SpecialCompare(r1_high, R, D) < 0) {
144     // 6a. Rhat = Rhat + B
145     r1_high += AddAndReturnCarry(R, R, B);
146     // 6b. Qhat = Qhat - 1
147     Subtract(Qhat, 1);
148   }
149   // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.
150   digit_t borrow = SubtractAndReturnBorrow(R, R, D);
151   DCHECK(borrow == r1_high);
152   DCHECK(Compare(R, B) < 0);
153   (void)borrow;
154   // 7. Return R = Rhat, Q = Qhat.
155 }
156 
157 // Algorithm 1 from the paper. Variable names same as there.
158 // Returns Q(uotient) and (R)emainder for A/B, with A twice the size of B.
D2n1n(RWDigits Q,RWDigits R,Digits A,Digits B)159 void BZ::D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B) {
160   int n = B.len();
161   DCHECK(A.len() <= 2 * n);
162   // A < B * 2^(kDigitsBits * n)
163   DCHECK(Compare(Digits(A, n, n), B) < 0);
164   DCHECK(Q.len() == n);
165   DCHECK(R.len() == n);
166   // 1. If n is odd or smaller than some convenient constant, compute Q and R
167   //    by school division and return.
168   if ((n & 1) == 1 || n < kBurnikelThreshold) {
169     return DivideBasecase(Q, R, A, B);
170   }
171   // 2. Split A into four parts A = [A1, ..., A4] with
172   //    Ai < 2^(kDigitBits * n / 2). Split B into two parts [B2, B1] with
173   //    Bi < 2^(kDigitBits * n / 2).
174   Digits A1A2(A, n, n);
175   Digits A3(A, n / 2, n / 2);
176   Digits A4(A, 0, n / 2);
177   // 3. Compute the high part Q1 of floor(A/B) as
178   //    Q1 = floor([A1, A2, A3] / [B1, B2]) with remainder R1 = [R11, R12],
179   //    using algorithm D3n2n.
180   RWDigits Q1(Q, n / 2, n / 2);
181   ScratchDigits R1(n);
182   D3n2n(Q1, R1, A1A2, A3, B);
183   if (proc_->should_terminate()) return;
184   // 4. Compute the low part Q2 of floor(A/B) as
185   //    Q2 = floor([R11, R12, A4] / [B1, B2]) with remainder R, using
186   //    algorithm D3n2n.
187   RWDigits Q2(Q, 0, n / 2);
188   D3n2n(Q2, R, R1, A4, B);
189   // 5. Return Q = [Q1, Q2] and R.
190 }
191 
192 }  // namespace
193 
194 // Algorithm 3 from the paper. Variables names same as there.
195 // Returns Q(uotient) and R(emainder) for A/B (no size restrictions).
196 // R is optional, Q is not.
DivideBurnikelZiegler(RWDigits Q,RWDigits R,Digits A,Digits B)197 void ProcessorImpl::DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A,
198                                           Digits B) {
199   DCHECK(A.len() >= B.len());
200   DCHECK(R.len() == 0 || R.len() >= B.len());
201   DCHECK(Q.len() > A.len() - B.len());
202   int r = A.len();
203   int s = B.len();
204   // The requirements are:
205   // - n >= s, n as small as possible.
206   // - m must be a power of two.
207   // 1. Set m = min {2^k | 2^k * kBurnikelThreshold > s}.
208   int m = 1 << BitLength(s / kBurnikelThreshold);
209   // 2. Set j = roundup(s/m) and n = j * m.
210   int j = DIV_CEIL(s, m);
211   int n = j * m;
212   // 3. Set sigma = max{tao | 2^tao * B < 2^(kDigitBits * n)}.
213   int sigma = CountLeadingZeros(B[s - 1]);
214   int digit_shift = n - s;
215   // 4. Set B = B * 2^sigma to normalize B. Shift A by the same amount.
216   ScratchDigits B_shifted(n);
217   LeftShift(B_shifted + digit_shift, B, sigma);
218   for (int i = 0; i < digit_shift; i++) B_shifted[i] = 0;
219   B = B_shifted;
220   // We need an extra digit if A's top digit does not have enough space for
221   // the left-shift by {sigma}. Additionally, the top bit of A must be 0
222   // (see "-1" in step 5 below), which combined with B being normalized (i.e.
223   // B's top bit is 1) ensures the preconditions of the helper functions.
224   int extra_digit = CountLeadingZeros(A[r - 1]) < (sigma + 1) ? 1 : 0;
225   r = A.len() + digit_shift + extra_digit;
226   ScratchDigits A_shifted(r);
227   LeftShift(A_shifted + digit_shift, A, sigma);
228   for (int i = 0; i < digit_shift; i++) A_shifted[i] = 0;
229   A = A_shifted;
230   // 5. Set t = min{t >= 2 | A < 2^(kDigitBits * t * n - 1)}.
231   int t = std::max(DIV_CEIL(r, n), 2);
232   // 6. Split A conceptually into t blocks.
233   // 7. Set Z_(t-2) = [A_(t-1), A_(t-2)].
234   int z_len = n * 2;
235   ScratchDigits Z(z_len);
236   PutAt(Z, A + n * (t - 2), z_len);
237   // 8. For i from t-2 downto 0 do:
238   BZ bz(this, n);
239   ScratchDigits Ri(n);
240   {
241     // First iteration unrolled and specialized.
242     // We might not have n digits at the top of Q, so use temporary storage
243     // for Qi...
244     ScratchDigits Qi(n);
245     bz.D2n1n(Qi, Ri, Z, B);
246     if (should_terminate()) return;
247     // ...but there *will* be enough space for any non-zero result digits!
248     Qi.Normalize();
249     RWDigits target = Q + n * (t - 2);
250     DCHECK(Qi.len() <= target.len());
251     PutAt(target, Qi, target.len());
252   }
253   // Now loop over any remaining iterations.
254   for (int i = t - 3; i >= 0; i--) {
255     // 8b. If i > 0, set Z_(i-1) = [Ri, A_(i-1)].
256     // (De-duped with unrolled first iteration, hence reading A_(i).)
257     PutAt(Z + n, Ri, n);
258     PutAt(Z, A + n * i, n);
259     // 8a. Using algorithm D2n1n compute Qi, Ri such that Zi = B*Qi + Ri.
260     RWDigits Qi(Q, i * n, n);
261     bz.D2n1n(Qi, Ri, Z, B);
262     if (should_terminate()) return;
263   }
264   // 9. Return Q = [Q_(t-2), ..., Q_0] and R = R_0 * 2^(-sigma).
265 #if DEBUG
266   for (int i = 0; i < digit_shift; i++) {
267     DCHECK(Ri[i] == 0);
268   }
269 #endif
270   if (R.len() != 0) {
271     Digits Ri_part(Ri, digit_shift, Ri.len());
272     Ri_part.Normalize();
273     DCHECK(Ri_part.len() <= R.len());
274     RightShift(R, Ri_part, sigma);
275   }
276 }
277 
278 }  // namespace bigint
279 }  // namespace v8
280