• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2020, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // An implementation of the NIST P-256 elliptic curve point multiplication.
16 // 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by
17 // Fiat, which lives in //third_party/fiat.
18 
19 #include <ring-core/base.h>
20 
21 #include "../../limbs/limbs.h"
22 #include "../../limbs/limbs.inl"
23 
24 #include "p256_shared.h"
25 
26 #include "../../internal.h"
27 #include "./util.h"
28 
29 #if !defined(OPENSSL_USE_NISTZ256)
30 
31 #if defined(_MSC_VER) && !defined(__clang__)
32 // '=': conversion from 'int64_t' to 'int32_t', possible loss of data
33 #pragma warning(disable: 4242)
34 // '=': conversion from 'int32_t' to 'uint8_t', possible loss of data
35 #pragma warning(disable: 4244)
36 // 'initializing': conversion from 'size_t' to 'fiat_p256_limb_t'
37 #pragma warning(disable: 4267)
38 #endif
39 
40 #if defined(__GNUC__) || defined(__clang__)
41 #pragma GCC diagnostic ignored "-Wconversion"
42 #pragma GCC diagnostic ignored "-Wsign-conversion"
43 #endif
44 
45 #if defined(__GNUC__) && !defined(__clang__)
46 #pragma GCC diagnostic ignored "-Winline"
47 #endif
48 
49 #if defined(BORINGSSL_HAS_UINT128)
50 #if defined(__GNUC__)
51 #pragma GCC diagnostic ignored "-Wpedantic"
52 #endif
53 #include "../../../third_party/fiat/p256_64.h"
54 #elif defined(OPENSSL_64_BIT)
55 #include "../../../third_party/fiat/p256_64_msvc.h"
56 #else
57 #include "../../../third_party/fiat/p256_32.h"
58 #endif
59 
60 
61 // utility functions, handwritten
62 
63 #if defined(OPENSSL_64_BIT)
64 #define FIAT_P256_NLIMBS 4
65 typedef uint64_t fiat_p256_limb_t;
66 typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS];
67 static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000,
68                                               0xffffffffffffffff, 0xfffffffe};
69 #else  // 64BIT; else 32BIT
70 #define FIAT_P256_NLIMBS 8
71 typedef uint32_t fiat_p256_limb_t;
72 typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS];
73 static const fiat_p256_felem fiat_p256_one = {
74     0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0};
75 #endif  // 64BIT
76 
77 
fiat_p256_nz(const fiat_p256_limb_t in1[FIAT_P256_NLIMBS])78 static fiat_p256_limb_t fiat_p256_nz(
79     const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
80   fiat_p256_limb_t ret;
81   fiat_p256_nonzero(&ret, in1);
82   return ret;
83 }
84 
fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],const fiat_p256_limb_t in1[FIAT_P256_NLIMBS])85 static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
86                            const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
87   for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
88     out[i] = in1[i];
89   }
90 }
91 
fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],fiat_p256_limb_t t,const fiat_p256_limb_t z[FIAT_P256_NLIMBS],const fiat_p256_limb_t nz[FIAT_P256_NLIMBS])92 static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
93                               fiat_p256_limb_t t,
94                               const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
95                               const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
96   fiat_p256_selectznz(out, !!t, z, nz);
97 }
98 
99 // Group operations
100 // ----------------
101 //
102 // Building on top of the field operations we have the operations on the
103 // elliptic curve group itself. Points on the curve are represented in Jacobian
104 // coordinates.
105 //
106 // Both operations were transcribed to Coq and proven to correspond to naive
107 // implementations using Affine coordinates, for all suitable fields.  In the
108 // Coq proofs, issues of constant-time execution and memory layout (aliasing)
109 // conventions were not considered. Specification of affine coordinates:
110 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28>
111 // As a sanity check, a proof that these points form a commutative group:
112 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33>
113 
114 // fiat_p256_point_double calculates 2*(x_in, y_in, z_in)
115 //
116 // The method is taken from:
117 //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
118 //
119 // Coq transcription and correctness proof:
120 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
121 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
122 //
123 // Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
124 // while x_out == y_in is not (maybe this works, but it's not tested).
fiat_p256_point_double(fiat_p256_felem x_out,fiat_p256_felem y_out,fiat_p256_felem z_out,const fiat_p256_felem x_in,const fiat_p256_felem y_in,const fiat_p256_felem z_in)125 static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out,
126                                    fiat_p256_felem z_out,
127                                    const fiat_p256_felem x_in,
128                                    const fiat_p256_felem y_in,
129                                    const fiat_p256_felem z_in) {
130   fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
131   // delta = z^2
132   fiat_p256_square(delta, z_in);
133   // gamma = y^2
134   fiat_p256_square(gamma, y_in);
135   // beta = x*gamma
136   fiat_p256_mul(beta, x_in, gamma);
137 
138   // alpha = 3*(x-delta)*(x+delta)
139   fiat_p256_sub(ftmp, x_in, delta);
140   fiat_p256_add(ftmp2, x_in, delta);
141 
142   fiat_p256_add(tmptmp, ftmp2, ftmp2);
143   fiat_p256_add(ftmp2, ftmp2, tmptmp);
144   fiat_p256_mul(alpha, ftmp, ftmp2);
145 
146   // x' = alpha^2 - 8*beta
147   fiat_p256_square(x_out, alpha);
148   fiat_p256_add(fourbeta, beta, beta);
149   fiat_p256_add(fourbeta, fourbeta, fourbeta);
150   fiat_p256_add(tmptmp, fourbeta, fourbeta);
151   fiat_p256_sub(x_out, x_out, tmptmp);
152 
153   // z' = (y + z)^2 - gamma - delta
154   fiat_p256_add(delta, gamma, delta);
155   fiat_p256_add(ftmp, y_in, z_in);
156   fiat_p256_square(z_out, ftmp);
157   fiat_p256_sub(z_out, z_out, delta);
158 
159   // y' = alpha*(4*beta - x') - 8*gamma^2
160   fiat_p256_sub(y_out, fourbeta, x_out);
161   fiat_p256_add(gamma, gamma, gamma);
162   fiat_p256_square(gamma, gamma);
163   fiat_p256_mul(y_out, alpha, y_out);
164   fiat_p256_add(gamma, gamma, gamma);
165   fiat_p256_sub(y_out, y_out, gamma);
166 }
167 
168 // fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
169 //
170 // The method is taken from:
171 //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
172 // adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
173 //
174 // Coq transcription and correctness proof:
175 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
176 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
177 //
178 // This function includes a branch for checking whether the two input points
179 // are equal, (while not equal to the point at infinity). This case never
180 // happens during single point multiplication, so there is no timing leak for
181 // ECDH or ECDSA signing.
fiat_p256_point_add(fiat_p256_felem x3,fiat_p256_felem y3,fiat_p256_felem z3,const fiat_p256_felem x1,const fiat_p256_felem y1,const fiat_p256_felem z1,const int mixed,const fiat_p256_felem x2,const fiat_p256_felem y2,const fiat_p256_felem z2)182 static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
183                                 fiat_p256_felem z3, const fiat_p256_felem x1,
184                                 const fiat_p256_felem y1,
185                                 const fiat_p256_felem z1, const int mixed,
186                                 const fiat_p256_felem x2,
187                                 const fiat_p256_felem y2,
188                                 const fiat_p256_felem z2) {
189   fiat_p256_felem x_out, y_out, z_out;
190   fiat_p256_limb_t z1nz = fiat_p256_nz(z1);
191   fiat_p256_limb_t z2nz = fiat_p256_nz(z2);
192 
193   // z1z1 = z1z1 = z1**2
194   fiat_p256_felem z1z1;
195   fiat_p256_square(z1z1, z1);
196 
197   fiat_p256_felem u1, s1, two_z1z2;
198   if (!mixed) {
199     // z2z2 = z2**2
200     fiat_p256_felem z2z2;
201     fiat_p256_square(z2z2, z2);
202 
203     // u1 = x1*z2z2
204     fiat_p256_mul(u1, x1, z2z2);
205 
206     // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
207     fiat_p256_add(two_z1z2, z1, z2);
208     fiat_p256_square(two_z1z2, two_z1z2);
209     fiat_p256_sub(two_z1z2, two_z1z2, z1z1);
210     fiat_p256_sub(two_z1z2, two_z1z2, z2z2);
211 
212     // s1 = y1 * z2**3
213     fiat_p256_mul(s1, z2, z2z2);
214     fiat_p256_mul(s1, s1, y1);
215   } else {
216     // We'll assume z2 = 1 (special case z2 = 0 is handled later).
217 
218     // u1 = x1*z2z2
219     fiat_p256_copy(u1, x1);
220     // two_z1z2 = 2z1z2
221     fiat_p256_add(two_z1z2, z1, z1);
222     // s1 = y1 * z2**3
223     fiat_p256_copy(s1, y1);
224   }
225 
226   // u2 = x2*z1z1
227   fiat_p256_felem u2;
228   fiat_p256_mul(u2, x2, z1z1);
229 
230   // h = u2 - u1
231   fiat_p256_felem h;
232   fiat_p256_sub(h, u2, u1);
233 
234   fiat_p256_limb_t xneq = fiat_p256_nz(h);
235 
236   // z_out = two_z1z2 * h
237   fiat_p256_mul(z_out, h, two_z1z2);
238 
239   // z1z1z1 = z1 * z1z1
240   fiat_p256_felem z1z1z1;
241   fiat_p256_mul(z1z1z1, z1, z1z1);
242 
243   // s2 = y2 * z1**3
244   fiat_p256_felem s2;
245   fiat_p256_mul(s2, y2, z1z1z1);
246 
247   // r = (s2 - s1)*2
248   fiat_p256_felem r;
249   fiat_p256_sub(r, s2, s1);
250   fiat_p256_add(r, r, r);
251 
252   fiat_p256_limb_t yneq = fiat_p256_nz(r);
253 
254   fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
255                                           ~constant_time_is_zero_w(z1nz) &
256                                           ~constant_time_is_zero_w(z2nz);
257   if (constant_time_declassify_w(is_nontrivial_double)) {
258     fiat_p256_point_double(x3, y3, z3, x1, y1, z1);
259     return;
260   }
261 
262   // I = (2h)**2
263   fiat_p256_felem i;
264   fiat_p256_add(i, h, h);
265   fiat_p256_square(i, i);
266 
267   // J = h * I
268   fiat_p256_felem j;
269   fiat_p256_mul(j, h, i);
270 
271   // V = U1 * I
272   fiat_p256_felem v;
273   fiat_p256_mul(v, u1, i);
274 
275   // x_out = r**2 - J - 2V
276   fiat_p256_square(x_out, r);
277   fiat_p256_sub(x_out, x_out, j);
278   fiat_p256_sub(x_out, x_out, v);
279   fiat_p256_sub(x_out, x_out, v);
280 
281   // y_out = r(V-x_out) - 2 * s1 * J
282   fiat_p256_sub(y_out, v, x_out);
283   fiat_p256_mul(y_out, y_out, r);
284   fiat_p256_felem s1j;
285   fiat_p256_mul(s1j, s1, j);
286   fiat_p256_sub(y_out, y_out, s1j);
287   fiat_p256_sub(y_out, y_out, s1j);
288 
289   fiat_p256_cmovznz(x_out, z1nz, x2, x_out);
290   fiat_p256_cmovznz(x3, z2nz, x1, x_out);
291   fiat_p256_cmovznz(y_out, z1nz, y2, y_out);
292   fiat_p256_cmovznz(y3, z2nz, y1, y_out);
293   fiat_p256_cmovznz(z_out, z1nz, z2, z_out);
294   fiat_p256_cmovznz(z3, z2nz, z1, z_out);
295 }
296 
297 #include "./p256_table.h"
298 
299 // fiat_p256_select_point_affine selects the |idx-1|th point from a
300 // precomputation table and copies it to out. If |idx| is zero, the output is
301 // the point at infinity.
fiat_p256_select_point_affine(const fiat_p256_limb_t idx,size_t size,const fiat_p256_felem pre_comp[][2],fiat_p256_felem out[3])302 static void fiat_p256_select_point_affine(
303     const fiat_p256_limb_t idx, size_t size,
304     const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
305   OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
306   for (size_t i = 0; i < size; i++) {
307     fiat_p256_limb_t mismatch = i ^ (idx - 1);
308     fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
309     fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
310   }
311   fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
312 }
313 
314 // fiat_p256_select_point selects the |idx|th point from a precomputation table
315 // and copies it to out.
fiat_p256_select_point(const fiat_p256_limb_t idx,size_t size,const fiat_p256_felem pre_comp[][3],fiat_p256_felem out[3])316 static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size,
317                                    const fiat_p256_felem pre_comp[/*size*/][3],
318                                    fiat_p256_felem out[3]) {
319   OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
320   for (size_t i = 0; i < size; i++) {
321     fiat_p256_limb_t mismatch = i ^ idx;
322     fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
323     fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
324     fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]);
325   }
326 }
327 
328 // fiat_p256_get_bit returns the |i|th bit in |in|
fiat_p256_get_bit(const Limb in[P256_LIMBS],int i)329 static crypto_word_t fiat_p256_get_bit(const Limb in[P256_LIMBS], int i) {
330   if (i < 0 || i >= 256) {
331     return 0;
332   }
333 #if defined(OPENSSL_64_BIT)
334   OPENSSL_STATIC_ASSERT(sizeof(Limb) == 8, "BN_ULONG was not 64-bit");
335   return (in[i >> 6] >> (i & 63)) & 1;
336 #else
337   OPENSSL_STATIC_ASSERT(sizeof(Limb) == 4, "BN_ULONG was not 32-bit");
338   return (in[i >> 5] >> (i & 31)) & 1;
339 #endif
340 }
341 
p256_point_mul(P256_POINT * r,const Limb scalar[P256_LIMBS],const Limb p_x[P256_LIMBS],const Limb p_y[P256_LIMBS])342 void p256_point_mul(P256_POINT *r, const Limb scalar[P256_LIMBS],
343                         const Limb p_x[P256_LIMBS], const Limb p_y[P256_LIMBS]) {
344   debug_assert_nonsecret(r != NULL);
345   debug_assert_nonsecret(scalar != NULL);
346   debug_assert_nonsecret(p_x != NULL);
347   debug_assert_nonsecret(p_y != NULL);
348 
349   fiat_p256_felem p_pre_comp[17][3];
350   OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
351   // Precompute multiples.
352   limbs_copy(&p_pre_comp[1][0][0], p_x, P256_LIMBS);
353   limbs_copy(&p_pre_comp[1][1][0], p_y, P256_LIMBS);
354   limbs_copy(&p_pre_comp[1][2][0], fiat_p256_one, P256_LIMBS);
355 
356   for (size_t j = 2; j <= 16; ++j) {
357     if (j & 1) {
358       fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
359                           p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
360                           0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
361                           p_pre_comp[j - 1][2]);
362     } else {
363       fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
364                              p_pre_comp[j][2], p_pre_comp[j / 2][0],
365                              p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
366     }
367   }
368 
369   // Set nq to the point at infinity.
370   fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
371 
372   // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
373   int skip = 1;  // Save two point operations in the first round.
374   for (size_t i = 255; i < 256; i--) {
375     // double
376     if (!skip) {
377       fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
378     }
379 
380     // do other additions every 5 doublings
381     if (i % 5 == 0) {
382       crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5;
383       bits |= fiat_p256_get_bit(scalar, i + 3) << 4;
384       bits |= fiat_p256_get_bit(scalar, i + 2) << 3;
385       bits |= fiat_p256_get_bit(scalar, i + 1) << 2;
386       bits |= fiat_p256_get_bit(scalar, i) << 1;
387       bits |= fiat_p256_get_bit(scalar, i - 1);
388       crypto_word_t sign, digit;
389       recode_scalar_bits(&sign, &digit, bits);
390 
391       // select the point to add or subtract, in constant time.
392       fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
393         RING_CORE_POINTLESS_ARRAY_CONST_CAST((const fiat_p256_felem(*)[3]))p_pre_comp,
394         tmp);
395       fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
396       fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
397 
398       if (!skip) {
399         fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
400                             0 /* mixed */, tmp[0], tmp[1], tmp[2]);
401       } else {
402         fiat_p256_copy(nq[0], tmp[0]);
403         fiat_p256_copy(nq[1], tmp[1]);
404         fiat_p256_copy(nq[2], tmp[2]);
405         skip = 0;
406       }
407     }
408   }
409 
410   limbs_copy(r->X, nq[0], P256_LIMBS);
411   limbs_copy(r->Y, nq[1], P256_LIMBS);
412   limbs_copy(r->Z, nq[2], P256_LIMBS);
413 }
414 
p256_point_mul_base(P256_POINT * r,const Limb scalar[P256_LIMBS])415 void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
416   // Set nq to the point at infinity.
417   fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
418 
419   int skip = 1;  // Save two point operations in the first round.
420   for (size_t i = 31; i < 32; i--) {
421     if (!skip) {
422       fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
423     }
424 
425     // First, look 32 bits upwards.
426     crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
427     bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
428     bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
429     bits |= fiat_p256_get_bit(scalar, i + 32);
430     // Select the point to add, in constant time.
431     fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
432                                   fiat_p256_g_pre_comp[1], tmp);
433 
434     if (!skip) {
435       fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
436                           1 /* mixed */, tmp[0], tmp[1], tmp[2]);
437     } else {
438       fiat_p256_copy(nq[0], tmp[0]);
439       fiat_p256_copy(nq[1], tmp[1]);
440       fiat_p256_copy(nq[2], tmp[2]);
441       skip = 0;
442     }
443 
444     // Second, look at the current position.
445     bits = fiat_p256_get_bit(scalar, i + 192) << 3;
446     bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
447     bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
448     bits |= fiat_p256_get_bit(scalar, i);
449     // Select the point to add, in constant time.
450     fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
451                                   fiat_p256_g_pre_comp[0], tmp);
452     fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
453                         tmp[0], tmp[1], tmp[2]);
454   }
455 
456   limbs_copy(r->X, nq[0], P256_LIMBS);
457   limbs_copy(r->Y, nq[1], P256_LIMBS);
458   limbs_copy(r->Z, nq[2], P256_LIMBS);
459 }
460 
p256_mul_mont(Limb r[P256_LIMBS],const Limb a[P256_LIMBS],const Limb b[P256_LIMBS])461 void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS],
462                        const Limb b[P256_LIMBS]) {
463   fiat_p256_mul(r, a, b);
464 }
465 
p256_sqr_mont(Limb r[P256_LIMBS],const Limb a[P256_LIMBS])466 void p256_sqr_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS]) {
467   fiat_p256_square(r, a);
468 }
469 
p256_point_add(P256_POINT * r,const P256_POINT * a,const P256_POINT * b)470 void p256_point_add(P256_POINT *r, const P256_POINT *a, const P256_POINT *b) {
471   fiat_p256_point_add(r->X, r->Y, r->Z,
472                       a->X, a->Y, a->Z,
473                       0,
474                       b->X, b->Y, b->Z);
475 }
476 
p256_point_double(P256_POINT * r,const P256_POINT * a)477 void p256_point_double(P256_POINT *r, const P256_POINT *a) {
478   fiat_p256_point_double(r->X, r->Y, r->Z,
479                          a->X, a->Y, a->Z);
480 }
481 
482 // For testing only.
p256_point_add_affine(P256_POINT * r,const P256_POINT * a,const BN_ULONG b[P256_LIMBS * 2])483 void p256_point_add_affine(P256_POINT *r, const P256_POINT *a,
484                                const BN_ULONG b[P256_LIMBS * 2]) {
485   const Limb *b_x = &b[0];
486   const Limb *b_y = &b[P256_LIMBS];
487   fiat_p256_felem b_z = {0};
488   crypto_word_t b_is_inf = constant_time_select_w(
489       LIMBS_are_zero(b_x, P256_LIMBS), LIMBS_are_zero(b_y, P256_LIMBS), 0);
490   fiat_p256_cmovznz(b_z, constant_time_is_zero_w(b_is_inf), b_z, fiat_p256_one);
491   fiat_p256_point_add(r->X, r->Y, r->Z,
492                       a->X, a->Y, a->Z,
493                       1,
494                       b_x, b_y, b_z);
495 }
496 
497 #endif
498