• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2020, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // An implementation of the NIST P-256 elliptic curve point multiplication.
16 // 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by
17 // Fiat, which lives in //third_party/fiat.
18 
19 #include <openssl/base.h>
20 
21 #include <openssl/bn.h>
22 #include <openssl/ec.h>
23 #include <openssl/err.h>
24 #include <openssl/mem.h>
25 #include <openssl/type_check.h>
26 
27 #include <assert.h>
28 #include <string.h>
29 
30 #include "../../internal.h"
31 #include "../delocate.h"
32 #include "./internal.h"
33 
34 
35 // MSVC does not implement uint128_t, and crashes with intrinsics
36 #if defined(BORINGSSL_HAS_UINT128)
37 #define BORINGSSL_NISTP256_64BIT 1
38 #include "../../../third_party/fiat/p256_64.h"
39 #else
40 #include "../../../third_party/fiat/p256_32.h"
41 #endif
42 
43 
44 // utility functions, handwritten
45 
46 #if defined(BORINGSSL_NISTP256_64BIT)
47 #define FIAT_P256_NLIMBS 4
48 typedef uint64_t fiat_p256_limb_t;
49 typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS];
50 static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000,
51                                               0xffffffffffffffff, 0xfffffffe};
52 #else  // 64BIT; else 32BIT
53 #define FIAT_P256_NLIMBS 8
54 typedef uint32_t fiat_p256_limb_t;
55 typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS];
56 static const fiat_p256_felem fiat_p256_one = {
57     0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0};
58 #endif  // 64BIT
59 
60 
fiat_p256_nz(const fiat_p256_limb_t in1[FIAT_P256_NLIMBS])61 static fiat_p256_limb_t fiat_p256_nz(
62     const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
63   fiat_p256_limb_t ret;
64   fiat_p256_nonzero(&ret, in1);
65   return ret;
66 }
67 
fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],const fiat_p256_limb_t in1[FIAT_P256_NLIMBS])68 static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
69                            const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
70   for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
71     out[i] = in1[i];
72   }
73 }
74 
fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],fiat_p256_limb_t t,const fiat_p256_limb_t z[FIAT_P256_NLIMBS],const fiat_p256_limb_t nz[FIAT_P256_NLIMBS])75 static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
76                               fiat_p256_limb_t t,
77                               const fiat_p256_limb_t z[FIAT_P256_NLIMBS],
78                               const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) {
79   fiat_p256_selectznz(out, !!t, z, nz);
80 }
81 
fiat_p256_from_generic(fiat_p256_felem out,const EC_FELEM * in)82 static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) {
83   fiat_p256_from_bytes(out, in->bytes);
84 }
85 
fiat_p256_to_generic(EC_FELEM * out,const fiat_p256_felem in)86 static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) {
87   // This works because 256 is a multiple of 64, so there are no excess bytes to
88   // zero when rounding up to |BN_ULONG|s.
89   OPENSSL_STATIC_ASSERT(
90       256 / 8 == sizeof(BN_ULONG) * ((256 + BN_BITS2 - 1) / BN_BITS2),
91       "fiat_p256_to_bytes leaves bytes uninitialized");
92   fiat_p256_to_bytes(out->bytes, in);
93 }
94 
95 // fiat_p256_inv_square calculates |out| = |in|^{-2}
96 //
97 // Based on Fermat's Little Theorem:
98 //   a^p = a (mod p)
99 //   a^{p-1} = 1 (mod p)
100 //   a^{p-3} = a^{-2} (mod p)
fiat_p256_inv_square(fiat_p256_felem out,const fiat_p256_felem in)101 static void fiat_p256_inv_square(fiat_p256_felem out,
102                                  const fiat_p256_felem in) {
103   // This implements the addition chain described in
104   // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
105   fiat_p256_felem x2, x3, x6, x12, x15, x30, x32;
106   fiat_p256_square(x2, in);   // 2^2 - 2^1
107   fiat_p256_mul(x2, x2, in);  // 2^2 - 2^0
108 
109   fiat_p256_square(x3, x2);   // 2^3 - 2^1
110   fiat_p256_mul(x3, x3, in);  // 2^3 - 2^0
111 
112   fiat_p256_square(x6, x3);
113   for (int i = 1; i < 3; i++) {
114     fiat_p256_square(x6, x6);
115   }                           // 2^6 - 2^3
116   fiat_p256_mul(x6, x6, x3);  // 2^6 - 2^0
117 
118   fiat_p256_square(x12, x6);
119   for (int i = 1; i < 6; i++) {
120     fiat_p256_square(x12, x12);
121   }                             // 2^12 - 2^6
122   fiat_p256_mul(x12, x12, x6);  // 2^12 - 2^0
123 
124   fiat_p256_square(x15, x12);
125   for (int i = 1; i < 3; i++) {
126     fiat_p256_square(x15, x15);
127   }                             // 2^15 - 2^3
128   fiat_p256_mul(x15, x15, x3);  // 2^15 - 2^0
129 
130   fiat_p256_square(x30, x15);
131   for (int i = 1; i < 15; i++) {
132     fiat_p256_square(x30, x30);
133   }                              // 2^30 - 2^15
134   fiat_p256_mul(x30, x30, x15);  // 2^30 - 2^0
135 
136   fiat_p256_square(x32, x30);
137   fiat_p256_square(x32, x32);   // 2^32 - 2^2
138   fiat_p256_mul(x32, x32, x2);  // 2^32 - 2^0
139 
140   fiat_p256_felem ret;
141   fiat_p256_square(ret, x32);
142   for (int i = 1; i < 31 + 1; i++) {
143     fiat_p256_square(ret, ret);
144   }                             // 2^64 - 2^32
145   fiat_p256_mul(ret, ret, in);  // 2^64 - 2^32 + 2^0
146 
147   for (int i = 0; i < 96 + 32; i++) {
148     fiat_p256_square(ret, ret);
149   }                              // 2^192 - 2^160 + 2^128
150   fiat_p256_mul(ret, ret, x32);  // 2^192 - 2^160 + 2^128 + 2^32 - 2^0
151 
152   for (int i = 0; i < 32; i++) {
153     fiat_p256_square(ret, ret);
154   }                              // 2^224 - 2^192 + 2^160 + 2^64 - 2^32
155   fiat_p256_mul(ret, ret, x32);  // 2^224 - 2^192 + 2^160 + 2^64 - 2^0
156 
157   for (int i = 0; i < 30; i++) {
158     fiat_p256_square(ret, ret);
159   }                              // 2^254 - 2^222 + 2^190 + 2^94 - 2^30
160   fiat_p256_mul(ret, ret, x30);  // 2^254 - 2^222 + 2^190 + 2^94 - 2^0
161 
162   fiat_p256_square(ret, ret);
163   fiat_p256_square(out, ret);  // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
164 }
165 
166 // Group operations
167 // ----------------
168 //
169 // Building on top of the field operations we have the operations on the
170 // elliptic curve group itself. Points on the curve are represented in Jacobian
171 // coordinates.
172 //
173 // Both operations were transcribed to Coq and proven to correspond to naive
174 // implementations using Affine coordinates, for all suitable fields.  In the
175 // Coq proofs, issues of constant-time execution and memory layout (aliasing)
176 // conventions were not considered. Specification of affine coordinates:
177 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28>
178 // As a sanity check, a proof that these points form a commutative group:
179 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33>
180 
181 // fiat_p256_point_double calculates 2*(x_in, y_in, z_in)
182 //
183 // The method is taken from:
184 //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
185 //
186 // Coq transcription and correctness proof:
187 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
188 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
189 //
190 // Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
191 // while x_out == y_in is not (maybe this works, but it's not tested).
fiat_p256_point_double(fiat_p256_felem x_out,fiat_p256_felem y_out,fiat_p256_felem z_out,const fiat_p256_felem x_in,const fiat_p256_felem y_in,const fiat_p256_felem z_in)192 static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out,
193                                    fiat_p256_felem z_out,
194                                    const fiat_p256_felem x_in,
195                                    const fiat_p256_felem y_in,
196                                    const fiat_p256_felem z_in) {
197   fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
198   // delta = z^2
199   fiat_p256_square(delta, z_in);
200   // gamma = y^2
201   fiat_p256_square(gamma, y_in);
202   // beta = x*gamma
203   fiat_p256_mul(beta, x_in, gamma);
204 
205   // alpha = 3*(x-delta)*(x+delta)
206   fiat_p256_sub(ftmp, x_in, delta);
207   fiat_p256_add(ftmp2, x_in, delta);
208 
209   fiat_p256_add(tmptmp, ftmp2, ftmp2);
210   fiat_p256_add(ftmp2, ftmp2, tmptmp);
211   fiat_p256_mul(alpha, ftmp, ftmp2);
212 
213   // x' = alpha^2 - 8*beta
214   fiat_p256_square(x_out, alpha);
215   fiat_p256_add(fourbeta, beta, beta);
216   fiat_p256_add(fourbeta, fourbeta, fourbeta);
217   fiat_p256_add(tmptmp, fourbeta, fourbeta);
218   fiat_p256_sub(x_out, x_out, tmptmp);
219 
220   // z' = (y + z)^2 - gamma - delta
221   fiat_p256_add(delta, gamma, delta);
222   fiat_p256_add(ftmp, y_in, z_in);
223   fiat_p256_square(z_out, ftmp);
224   fiat_p256_sub(z_out, z_out, delta);
225 
226   // y' = alpha*(4*beta - x') - 8*gamma^2
227   fiat_p256_sub(y_out, fourbeta, x_out);
228   fiat_p256_add(gamma, gamma, gamma);
229   fiat_p256_square(gamma, gamma);
230   fiat_p256_mul(y_out, alpha, y_out);
231   fiat_p256_add(gamma, gamma, gamma);
232   fiat_p256_sub(y_out, y_out, gamma);
233 }
234 
235 // fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
236 //
237 // The method is taken from:
238 //   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
239 // adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
240 //
241 // Coq transcription and correctness proof:
242 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135>
243 // <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205>
244 //
245 // This function includes a branch for checking whether the two input points
246 // are equal, (while not equal to the point at infinity). This case never
247 // happens during single point multiplication, so there is no timing leak for
248 // ECDH or ECDSA signing.
fiat_p256_point_add(fiat_p256_felem x3,fiat_p256_felem y3,fiat_p256_felem z3,const fiat_p256_felem x1,const fiat_p256_felem y1,const fiat_p256_felem z1,const int mixed,const fiat_p256_felem x2,const fiat_p256_felem y2,const fiat_p256_felem z2)249 static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3,
250                                 fiat_p256_felem z3, const fiat_p256_felem x1,
251                                 const fiat_p256_felem y1,
252                                 const fiat_p256_felem z1, const int mixed,
253                                 const fiat_p256_felem x2,
254                                 const fiat_p256_felem y2,
255                                 const fiat_p256_felem z2) {
256   fiat_p256_felem x_out, y_out, z_out;
257   fiat_p256_limb_t z1nz = fiat_p256_nz(z1);
258   fiat_p256_limb_t z2nz = fiat_p256_nz(z2);
259 
260   // z1z1 = z1z1 = z1**2
261   fiat_p256_felem z1z1;
262   fiat_p256_square(z1z1, z1);
263 
264   fiat_p256_felem u1, s1, two_z1z2;
265   if (!mixed) {
266     // z2z2 = z2**2
267     fiat_p256_felem z2z2;
268     fiat_p256_square(z2z2, z2);
269 
270     // u1 = x1*z2z2
271     fiat_p256_mul(u1, x1, z2z2);
272 
273     // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
274     fiat_p256_add(two_z1z2, z1, z2);
275     fiat_p256_square(two_z1z2, two_z1z2);
276     fiat_p256_sub(two_z1z2, two_z1z2, z1z1);
277     fiat_p256_sub(two_z1z2, two_z1z2, z2z2);
278 
279     // s1 = y1 * z2**3
280     fiat_p256_mul(s1, z2, z2z2);
281     fiat_p256_mul(s1, s1, y1);
282   } else {
283     // We'll assume z2 = 1 (special case z2 = 0 is handled later).
284 
285     // u1 = x1*z2z2
286     fiat_p256_copy(u1, x1);
287     // two_z1z2 = 2z1z2
288     fiat_p256_add(two_z1z2, z1, z1);
289     // s1 = y1 * z2**3
290     fiat_p256_copy(s1, y1);
291   }
292 
293   // u2 = x2*z1z1
294   fiat_p256_felem u2;
295   fiat_p256_mul(u2, x2, z1z1);
296 
297   // h = u2 - u1
298   fiat_p256_felem h;
299   fiat_p256_sub(h, u2, u1);
300 
301   fiat_p256_limb_t xneq = fiat_p256_nz(h);
302 
303   // z_out = two_z1z2 * h
304   fiat_p256_mul(z_out, h, two_z1z2);
305 
306   // z1z1z1 = z1 * z1z1
307   fiat_p256_felem z1z1z1;
308   fiat_p256_mul(z1z1z1, z1, z1z1);
309 
310   // s2 = y2 * z1**3
311   fiat_p256_felem s2;
312   fiat_p256_mul(s2, y2, z1z1z1);
313 
314   // r = (s2 - s1)*2
315   fiat_p256_felem r;
316   fiat_p256_sub(r, s2, s1);
317   fiat_p256_add(r, r, r);
318 
319   fiat_p256_limb_t yneq = fiat_p256_nz(r);
320 
321   fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) &
322                                           ~constant_time_is_zero_w(z1nz) &
323                                           ~constant_time_is_zero_w(z2nz);
324   if (is_nontrivial_double) {
325     fiat_p256_point_double(x3, y3, z3, x1, y1, z1);
326     return;
327   }
328 
329   // I = (2h)**2
330   fiat_p256_felem i;
331   fiat_p256_add(i, h, h);
332   fiat_p256_square(i, i);
333 
334   // J = h * I
335   fiat_p256_felem j;
336   fiat_p256_mul(j, h, i);
337 
338   // V = U1 * I
339   fiat_p256_felem v;
340   fiat_p256_mul(v, u1, i);
341 
342   // x_out = r**2 - J - 2V
343   fiat_p256_square(x_out, r);
344   fiat_p256_sub(x_out, x_out, j);
345   fiat_p256_sub(x_out, x_out, v);
346   fiat_p256_sub(x_out, x_out, v);
347 
348   // y_out = r(V-x_out) - 2 * s1 * J
349   fiat_p256_sub(y_out, v, x_out);
350   fiat_p256_mul(y_out, y_out, r);
351   fiat_p256_felem s1j;
352   fiat_p256_mul(s1j, s1, j);
353   fiat_p256_sub(y_out, y_out, s1j);
354   fiat_p256_sub(y_out, y_out, s1j);
355 
356   fiat_p256_cmovznz(x_out, z1nz, x2, x_out);
357   fiat_p256_cmovznz(x3, z2nz, x1, x_out);
358   fiat_p256_cmovznz(y_out, z1nz, y2, y_out);
359   fiat_p256_cmovznz(y3, z2nz, y1, y_out);
360   fiat_p256_cmovznz(z_out, z1nz, z2, z_out);
361   fiat_p256_cmovznz(z3, z2nz, z1, z_out);
362 }
363 
364 #include "./p256_table.h"
365 
366 // fiat_p256_select_point_affine selects the |idx-1|th point from a
367 // precomputation table and copies it to out. If |idx| is zero, the output is
368 // the point at infinity.
fiat_p256_select_point_affine(const fiat_p256_limb_t idx,size_t size,const fiat_p256_felem pre_comp[][2],fiat_p256_felem out[3])369 static void fiat_p256_select_point_affine(
370     const fiat_p256_limb_t idx, size_t size,
371     const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) {
372   OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
373   for (size_t i = 0; i < size; i++) {
374     fiat_p256_limb_t mismatch = i ^ (idx - 1);
375     fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
376     fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
377   }
378   fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one);
379 }
380 
381 // fiat_p256_select_point selects the |idx|th point from a precomputation table
382 // and copies it to out.
fiat_p256_select_point(const fiat_p256_limb_t idx,size_t size,const fiat_p256_felem pre_comp[][3],fiat_p256_felem out[3])383 static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size,
384                                    const fiat_p256_felem pre_comp[/*size*/][3],
385                                    fiat_p256_felem out[3]) {
386   OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3);
387   for (size_t i = 0; i < size; i++) {
388     fiat_p256_limb_t mismatch = i ^ idx;
389     fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]);
390     fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]);
391     fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]);
392   }
393 }
394 
395 // fiat_p256_get_bit returns the |i|th bit in |in|
fiat_p256_get_bit(const uint8_t * in,int i)396 static crypto_word_t fiat_p256_get_bit(const uint8_t *in, int i) {
397   if (i < 0 || i >= 256) {
398     return 0;
399   }
400   return (in[i >> 3] >> (i & 7)) & 1;
401 }
402 
403 // OPENSSL EC_METHOD FUNCTIONS
404 
405 // Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
406 // (X/Z^2, Y/Z^3).
ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP * group,const EC_RAW_POINT * point,EC_FELEM * x_out,EC_FELEM * y_out)407 static int ec_GFp_nistp256_point_get_affine_coordinates(
408     const EC_GROUP *group, const EC_RAW_POINT *point, EC_FELEM *x_out,
409     EC_FELEM *y_out) {
410   if (ec_GFp_simple_is_at_infinity(group, point)) {
411     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
412     return 0;
413   }
414 
415   fiat_p256_felem z1, z2;
416   fiat_p256_from_generic(z1, &point->Z);
417   fiat_p256_inv_square(z2, z1);
418 
419   if (x_out != NULL) {
420     fiat_p256_felem x;
421     fiat_p256_from_generic(x, &point->X);
422     fiat_p256_mul(x, x, z2);
423     fiat_p256_to_generic(x_out, x);
424   }
425 
426   if (y_out != NULL) {
427     fiat_p256_felem y;
428     fiat_p256_from_generic(y, &point->Y);
429     fiat_p256_square(z2, z2);  // z^-4
430     fiat_p256_mul(y, y, z1);   // y * z
431     fiat_p256_mul(y, y, z2);   // y * z^-3
432     fiat_p256_to_generic(y_out, y);
433   }
434 
435   return 1;
436 }
437 
ec_GFp_nistp256_add(const EC_GROUP * group,EC_RAW_POINT * r,const EC_RAW_POINT * a,const EC_RAW_POINT * b)438 static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_RAW_POINT *r,
439                                 const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
440   fiat_p256_felem x1, y1, z1, x2, y2, z2;
441   fiat_p256_from_generic(x1, &a->X);
442   fiat_p256_from_generic(y1, &a->Y);
443   fiat_p256_from_generic(z1, &a->Z);
444   fiat_p256_from_generic(x2, &b->X);
445   fiat_p256_from_generic(y2, &b->Y);
446   fiat_p256_from_generic(z2, &b->Z);
447   fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2,
448                       z2);
449   fiat_p256_to_generic(&r->X, x1);
450   fiat_p256_to_generic(&r->Y, y1);
451   fiat_p256_to_generic(&r->Z, z1);
452 }
453 
ec_GFp_nistp256_dbl(const EC_GROUP * group,EC_RAW_POINT * r,const EC_RAW_POINT * a)454 static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
455                                 const EC_RAW_POINT *a) {
456   fiat_p256_felem x, y, z;
457   fiat_p256_from_generic(x, &a->X);
458   fiat_p256_from_generic(y, &a->Y);
459   fiat_p256_from_generic(z, &a->Z);
460   fiat_p256_point_double(x, y, z, x, y, z);
461   fiat_p256_to_generic(&r->X, x);
462   fiat_p256_to_generic(&r->Y, y);
463   fiat_p256_to_generic(&r->Z, z);
464 }
465 
ec_GFp_nistp256_point_mul(const EC_GROUP * group,EC_RAW_POINT * r,const EC_RAW_POINT * p,const EC_SCALAR * scalar)466 static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
467                                       const EC_RAW_POINT *p,
468                                       const EC_SCALAR *scalar) {
469   fiat_p256_felem p_pre_comp[17][3];
470   OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
471   // Precompute multiples.
472   fiat_p256_from_generic(p_pre_comp[1][0], &p->X);
473   fiat_p256_from_generic(p_pre_comp[1][1], &p->Y);
474   fiat_p256_from_generic(p_pre_comp[1][2], &p->Z);
475   for (size_t j = 2; j <= 16; ++j) {
476     if (j & 1) {
477       fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
478                           p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2],
479                           0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
480                           p_pre_comp[j - 1][2]);
481     } else {
482       fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1],
483                              p_pre_comp[j][2], p_pre_comp[j / 2][0],
484                              p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]);
485     }
486   }
487 
488   // Set nq to the point at infinity.
489   fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3];
490 
491   // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round.
492   int skip = 1;  // Save two point operations in the first round.
493   for (size_t i = 255; i < 256; i--) {
494     // double
495     if (!skip) {
496       fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
497     }
498 
499     // do other additions every 5 doublings
500     if (i % 5 == 0) {
501       crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
502       bits |= fiat_p256_get_bit(scalar->bytes, i + 3) << 4;
503       bits |= fiat_p256_get_bit(scalar->bytes, i + 2) << 3;
504       bits |= fiat_p256_get_bit(scalar->bytes, i + 1) << 2;
505       bits |= fiat_p256_get_bit(scalar->bytes, i) << 1;
506       bits |= fiat_p256_get_bit(scalar->bytes, i - 1);
507       crypto_word_t sign, digit;
508       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
509 
510       // select the point to add or subtract, in constant time.
511       fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
512                              (const fiat_p256_felem(*)[3])p_pre_comp, tmp);
513       fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
514       fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
515 
516       if (!skip) {
517         fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
518                             0 /* mixed */, tmp[0], tmp[1], tmp[2]);
519       } else {
520         fiat_p256_copy(nq[0], tmp[0]);
521         fiat_p256_copy(nq[1], tmp[1]);
522         fiat_p256_copy(nq[2], tmp[2]);
523         skip = 0;
524       }
525     }
526   }
527 
528   fiat_p256_to_generic(&r->X, nq[0]);
529   fiat_p256_to_generic(&r->Y, nq[1]);
530   fiat_p256_to_generic(&r->Z, nq[2]);
531 }
532 
ec_GFp_nistp256_point_mul_base(const EC_GROUP * group,EC_RAW_POINT * r,const EC_SCALAR * scalar)533 static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
534                                            EC_RAW_POINT *r,
535                                            const EC_SCALAR *scalar) {
536   // Set nq to the point at infinity.
537   fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3];
538 
539   int skip = 1;  // Save two point operations in the first round.
540   for (size_t i = 31; i < 32; i--) {
541     if (!skip) {
542       fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
543     }
544 
545     // First, look 32 bits upwards.
546     crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
547     bits |= fiat_p256_get_bit(scalar->bytes, i + 160) << 2;
548     bits |= fiat_p256_get_bit(scalar->bytes, i + 96) << 1;
549     bits |= fiat_p256_get_bit(scalar->bytes, i + 32);
550     // Select the point to add, in constant time.
551     fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
552                                   fiat_p256_g_pre_comp[1], tmp);
553 
554     if (!skip) {
555       fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
556                           1 /* mixed */, tmp[0], tmp[1], tmp[2]);
557     } else {
558       fiat_p256_copy(nq[0], tmp[0]);
559       fiat_p256_copy(nq[1], tmp[1]);
560       fiat_p256_copy(nq[2], tmp[2]);
561       skip = 0;
562     }
563 
564     // Second, look at the current position.
565     bits = fiat_p256_get_bit(scalar->bytes, i + 192) << 3;
566     bits |= fiat_p256_get_bit(scalar->bytes, i + 128) << 2;
567     bits |= fiat_p256_get_bit(scalar->bytes, i + 64) << 1;
568     bits |= fiat_p256_get_bit(scalar->bytes, i);
569     // Select the point to add, in constant time.
570     fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
571                                   fiat_p256_g_pre_comp[0], tmp);
572     fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
573                         tmp[0], tmp[1], tmp[2]);
574   }
575 
576   fiat_p256_to_generic(&r->X, nq[0]);
577   fiat_p256_to_generic(&r->Y, nq[1]);
578   fiat_p256_to_generic(&r->Z, nq[2]);
579 }
580 
ec_GFp_nistp256_point_mul_public(const EC_GROUP * group,EC_RAW_POINT * r,const EC_SCALAR * g_scalar,const EC_RAW_POINT * p,const EC_SCALAR * p_scalar)581 static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
582                                              EC_RAW_POINT *r,
583                                              const EC_SCALAR *g_scalar,
584                                              const EC_RAW_POINT *p,
585                                              const EC_SCALAR *p_scalar) {
586 #define P256_WSIZE_PUBLIC 4
587   // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|.
588   fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3];
589   fiat_p256_from_generic(p_pre_comp[0][0], &p->X);
590   fiat_p256_from_generic(p_pre_comp[0][1], &p->Y);
591   fiat_p256_from_generic(p_pre_comp[0][2], &p->Z);
592   fiat_p256_felem p2[3];
593   fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0],
594                          p_pre_comp[0][1], p_pre_comp[0][2]);
595   for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) {
596     fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
597                         p_pre_comp[i - 1][0], p_pre_comp[i - 1][1],
598                         p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1],
599                         p2[2]);
600   }
601 
602   // Set up the coefficients for |p_scalar|.
603   int8_t p_wNAF[257];
604   ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
605 
606   // Set |ret| to the point at infinity.
607   int skip = 1;  // Save some point operations.
608   fiat_p256_felem ret[3] = {{0}, {0}, {0}};
609   for (int i = 256; i >= 0; i--) {
610     if (!skip) {
611       fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]);
612     }
613 
614     // For the |g_scalar|, we use the precomputed table without the
615     // constant-time lookup.
616     if (i <= 31) {
617       // First, look 32 bits upwards.
618       crypto_word_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
619       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 160) << 2;
620       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 96) << 1;
621       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 32);
622       if (bits != 0) {
623         size_t index = (size_t)(bits - 1);
624         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
625                             1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
626                             fiat_p256_g_pre_comp[1][index][1],
627                             fiat_p256_one);
628         skip = 0;
629       }
630 
631       // Second, look at the current position.
632       bits = fiat_p256_get_bit(g_scalar->bytes, i + 192) << 3;
633       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 128) << 2;
634       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 64) << 1;
635       bits |= fiat_p256_get_bit(g_scalar->bytes, i);
636       if (bits != 0) {
637         size_t index = (size_t)(bits - 1);
638         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
639                             1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
640                             fiat_p256_g_pre_comp[0][index][1],
641                             fiat_p256_one);
642         skip = 0;
643       }
644     }
645 
646     int digit = p_wNAF[i];
647     if (digit != 0) {
648       assert(digit & 1);
649       size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
650       fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
651       if (digit < 0) {
652         fiat_p256_opp(tmp, p_pre_comp[idx][1]);
653         y = &tmp;
654       }
655       if (!skip) {
656         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
657                             0 /* not mixed */, p_pre_comp[idx][0], *y,
658                             p_pre_comp[idx][2]);
659       } else {
660         fiat_p256_copy(ret[0], p_pre_comp[idx][0]);
661         fiat_p256_copy(ret[1], *y);
662         fiat_p256_copy(ret[2], p_pre_comp[idx][2]);
663         skip = 0;
664       }
665     }
666   }
667 
668   fiat_p256_to_generic(&r->X, ret[0]);
669   fiat_p256_to_generic(&r->Y, ret[1]);
670   fiat_p256_to_generic(&r->Z, ret[2]);
671 }
672 
ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP * group,const EC_RAW_POINT * p,const EC_SCALAR * r)673 static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
674                                             const EC_RAW_POINT *p,
675                                             const EC_SCALAR *r) {
676   if (ec_GFp_simple_is_at_infinity(group, p)) {
677     return 0;
678   }
679 
680   // We wish to compare X/Z^2 with r. This is equivalent to comparing X with
681   // r*Z^2. Note that X and Z are represented in Montgomery form, while r is
682   // not.
683   fiat_p256_felem Z2_mont;
684   fiat_p256_from_generic(Z2_mont, &p->Z);
685   fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont);
686 
687   fiat_p256_felem r_Z2;
688   fiat_p256_from_bytes(r_Z2, r->bytes);  // r < order < p, so this is valid.
689   fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
690 
691   fiat_p256_felem X;
692   fiat_p256_from_generic(X, &p->X);
693   fiat_p256_from_montgomery(X, X);
694 
695   if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
696     return 1;
697   }
698 
699   // During signing the x coefficient is reduced modulo the group order.
700   // Therefore there is a small possibility, less than 1/2^128, that group_order
701   // < p.x < P. in that case we need not only to compare against |r| but also to
702   // compare against r+group_order.
703   assert(group->field.width == group->order.width);
704   if (bn_less_than_words(r->words, group->field_minus_order.words,
705                          group->field.width)) {
706     // We can ignore the carry because: r + group_order < p < 2^256.
707     EC_FELEM tmp;
708     bn_add_words(tmp.words, r->words, group->order.d, group->order.width);
709     fiat_p256_from_generic(r_Z2, &tmp);
710     fiat_p256_mul(r_Z2, r_Z2, Z2_mont);
711     if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) {
712       return 1;
713     }
714   }
715 
716   return 0;
717 }
718 
DEFINE_METHOD_FUNCTION(EC_METHOD,EC_GFp_nistp256_method)719 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) {
720   out->group_init = ec_GFp_mont_group_init;
721   out->group_finish = ec_GFp_mont_group_finish;
722   out->group_set_curve = ec_GFp_mont_group_set_curve;
723   out->point_get_affine_coordinates =
724       ec_GFp_nistp256_point_get_affine_coordinates;
725   out->add = ec_GFp_nistp256_add;
726   out->dbl = ec_GFp_nistp256_dbl;
727   out->mul = ec_GFp_nistp256_point_mul;
728   out->mul_base = ec_GFp_nistp256_point_mul_base;
729   out->mul_public = ec_GFp_nistp256_point_mul_public;
730   out->felem_mul = ec_GFp_mont_felem_mul;
731   out->felem_sqr = ec_GFp_mont_felem_sqr;
732   out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
733   out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
734   out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery;
735   out->scalar_to_montgomery_inv_vartime =
736       ec_simple_scalar_to_montgomery_inv_vartime;
737   out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate;
738 }
739 
740 #undef BORINGSSL_NISTP256_64BIT
741