1/* Copyright 2020 The BoringSSL Authors 2 * 3 * Permission to use, copy, modify, and/or distribute this software for any 4 * purpose with or without fee is hereby granted, provided that the above 5 * copyright notice and this permission notice appear in all copies. 6 * 7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15// An implementation of the NIST P-256 elliptic curve point multiplication. 16// 256-bit Montgomery form for 64 and 32-bit. Field operations are generated by 17// Fiat, which lives in //third_party/fiat. 18 19#include <openssl/base.h> 20 21#include <openssl/bn.h> 22#include <openssl/ec.h> 23#include <openssl/err.h> 24#include <openssl/mem.h> 25 26#include <assert.h> 27#include <string.h> 28 29#include "../../internal.h" 30#include "../delocate.h" 31#include "./internal.h" 32 33#if defined(BORINGSSL_HAS_UINT128) 34#include "../../../third_party/fiat/p256_64.h" 35#elif defined(OPENSSL_64_BIT) 36#include "../../../third_party/fiat/p256_64_msvc.h" 37#else 38#include "../../../third_party/fiat/p256_32.h" 39#endif 40 41 42// utility functions, handwritten 43 44#if defined(OPENSSL_64_BIT) 45#define FIAT_P256_NLIMBS 4 46typedef uint64_t fiat_p256_limb_t; 47typedef uint64_t fiat_p256_felem[FIAT_P256_NLIMBS]; 48static const fiat_p256_felem fiat_p256_one = {0x1, 0xffffffff00000000, 49 0xffffffffffffffff, 0xfffffffe}; 50#else // 64BIT; else 32BIT 51#define FIAT_P256_NLIMBS 8 52typedef uint32_t fiat_p256_limb_t; 53typedef uint32_t fiat_p256_felem[FIAT_P256_NLIMBS]; 54static const fiat_p256_felem fiat_p256_one = { 55 0x1, 0x0, 0x0, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0x0}; 56#endif // 64BIT 57 58 59static fiat_p256_limb_t fiat_p256_nz( 60 const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { 61 fiat_p256_limb_t ret; 62 fiat_p256_nonzero(&ret, in1); 63 return ret; 64} 65 66static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS], 67 const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) { 68 for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) { 69 out[i] = in1[i]; 70 } 71} 72 73static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS], 74 fiat_p256_limb_t t, 75 const fiat_p256_limb_t z[FIAT_P256_NLIMBS], 76 const fiat_p256_limb_t nz[FIAT_P256_NLIMBS]) { 77 fiat_p256_selectznz(out, !!t, z, nz); 78} 79 80static void fiat_p256_from_words(fiat_p256_felem out, 81 const BN_ULONG in[32 / sizeof(BN_ULONG)]) { 82 // Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on 83 // 64-bit platforms without |uint128_t|, they are different. However, on 84 // little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same 85 // layout. 86 OPENSSL_memcpy(out, in, 32); 87} 88 89static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) { 90 fiat_p256_from_words(out, in->words); 91} 92 93static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) { 94 // See |fiat_p256_from_words|. 95 OPENSSL_memcpy(out->words, in, 32); 96} 97 98// fiat_p256_inv_square calculates |out| = |in|^{-2} 99// 100// Based on Fermat's Little Theorem: 101// a^p = a (mod p) 102// a^{p-1} = 1 (mod p) 103// a^{p-3} = a^{-2} (mod p) 104static void fiat_p256_inv_square(fiat_p256_felem out, 105 const fiat_p256_felem in) { 106 // This implements the addition chain described in 107 // https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion 108 fiat_p256_felem x2, x3, x6, x12, x15, x30, x32; 109 fiat_p256_square(x2, in); // 2^2 - 2^1 110 fiat_p256_mul(x2, x2, in); // 2^2 - 2^0 111 112 fiat_p256_square(x3, x2); // 2^3 - 2^1 113 fiat_p256_mul(x3, x3, in); // 2^3 - 2^0 114 115 fiat_p256_square(x6, x3); 116 for (int i = 1; i < 3; i++) { 117 fiat_p256_square(x6, x6); 118 } // 2^6 - 2^3 119 fiat_p256_mul(x6, x6, x3); // 2^6 - 2^0 120 121 fiat_p256_square(x12, x6); 122 for (int i = 1; i < 6; i++) { 123 fiat_p256_square(x12, x12); 124 } // 2^12 - 2^6 125 fiat_p256_mul(x12, x12, x6); // 2^12 - 2^0 126 127 fiat_p256_square(x15, x12); 128 for (int i = 1; i < 3; i++) { 129 fiat_p256_square(x15, x15); 130 } // 2^15 - 2^3 131 fiat_p256_mul(x15, x15, x3); // 2^15 - 2^0 132 133 fiat_p256_square(x30, x15); 134 for (int i = 1; i < 15; i++) { 135 fiat_p256_square(x30, x30); 136 } // 2^30 - 2^15 137 fiat_p256_mul(x30, x30, x15); // 2^30 - 2^0 138 139 fiat_p256_square(x32, x30); 140 fiat_p256_square(x32, x32); // 2^32 - 2^2 141 fiat_p256_mul(x32, x32, x2); // 2^32 - 2^0 142 143 fiat_p256_felem ret; 144 fiat_p256_square(ret, x32); 145 for (int i = 1; i < 31 + 1; i++) { 146 fiat_p256_square(ret, ret); 147 } // 2^64 - 2^32 148 fiat_p256_mul(ret, ret, in); // 2^64 - 2^32 + 2^0 149 150 for (int i = 0; i < 96 + 32; i++) { 151 fiat_p256_square(ret, ret); 152 } // 2^192 - 2^160 + 2^128 153 fiat_p256_mul(ret, ret, x32); // 2^192 - 2^160 + 2^128 + 2^32 - 2^0 154 155 for (int i = 0; i < 32; i++) { 156 fiat_p256_square(ret, ret); 157 } // 2^224 - 2^192 + 2^160 + 2^64 - 2^32 158 fiat_p256_mul(ret, ret, x32); // 2^224 - 2^192 + 2^160 + 2^64 - 2^0 159 160 for (int i = 0; i < 30; i++) { 161 fiat_p256_square(ret, ret); 162 } // 2^254 - 2^222 + 2^190 + 2^94 - 2^30 163 fiat_p256_mul(ret, ret, x30); // 2^254 - 2^222 + 2^190 + 2^94 - 2^0 164 165 fiat_p256_square(ret, ret); 166 fiat_p256_square(out, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2 167} 168 169// Group operations 170// ---------------- 171// 172// Building on top of the field operations we have the operations on the 173// elliptic curve group itself. Points on the curve are represented in Jacobian 174// coordinates. 175// 176// Both operations were transcribed to Coq and proven to correspond to naive 177// implementations using Affine coordinates, for all suitable fields. In the 178// Coq proofs, issues of constant-time execution and memory layout (aliasing) 179// conventions were not considered. Specification of affine coordinates: 180// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28> 181// As a sanity check, a proof that these points form a commutative group: 182// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33> 183 184// fiat_p256_point_double calculates 2*(x_in, y_in, z_in) 185// 186// The method is taken from: 187// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 188// 189// Coq transcription and correctness proof: 190// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93> 191// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201> 192// 193// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 194// while x_out == y_in is not (maybe this works, but it's not tested). 195static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out, 196 fiat_p256_felem z_out, 197 const fiat_p256_felem x_in, 198 const fiat_p256_felem y_in, 199 const fiat_p256_felem z_in) { 200 fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta; 201 // delta = z^2 202 fiat_p256_square(delta, z_in); 203 // gamma = y^2 204 fiat_p256_square(gamma, y_in); 205 // beta = x*gamma 206 fiat_p256_mul(beta, x_in, gamma); 207 208 // alpha = 3*(x-delta)*(x+delta) 209 fiat_p256_sub(ftmp, x_in, delta); 210 fiat_p256_add(ftmp2, x_in, delta); 211 212 fiat_p256_add(tmptmp, ftmp2, ftmp2); 213 fiat_p256_add(ftmp2, ftmp2, tmptmp); 214 fiat_p256_mul(alpha, ftmp, ftmp2); 215 216 // x' = alpha^2 - 8*beta 217 fiat_p256_square(x_out, alpha); 218 fiat_p256_add(fourbeta, beta, beta); 219 fiat_p256_add(fourbeta, fourbeta, fourbeta); 220 fiat_p256_add(tmptmp, fourbeta, fourbeta); 221 fiat_p256_sub(x_out, x_out, tmptmp); 222 223 // z' = (y + z)^2 - gamma - delta 224 fiat_p256_add(delta, gamma, delta); 225 fiat_p256_add(ftmp, y_in, z_in); 226 fiat_p256_square(z_out, ftmp); 227 fiat_p256_sub(z_out, z_out, delta); 228 229 // y' = alpha*(4*beta - x') - 8*gamma^2 230 fiat_p256_sub(y_out, fourbeta, x_out); 231 fiat_p256_add(gamma, gamma, gamma); 232 fiat_p256_square(gamma, gamma); 233 fiat_p256_mul(y_out, alpha, y_out); 234 fiat_p256_add(gamma, gamma, gamma); 235 fiat_p256_sub(y_out, y_out, gamma); 236} 237 238// fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2) 239// 240// The method is taken from: 241// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 242// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 243// 244// Coq transcription and correctness proof: 245// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L135> 246// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L205> 247// 248// This function includes a branch for checking whether the two input points 249// are equal, (while not equal to the point at infinity). This case never 250// happens during single point multiplication, so there is no timing leak for 251// ECDH or ECDSA signing. 252static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3, 253 fiat_p256_felem z3, const fiat_p256_felem x1, 254 const fiat_p256_felem y1, 255 const fiat_p256_felem z1, const int mixed, 256 const fiat_p256_felem x2, 257 const fiat_p256_felem y2, 258 const fiat_p256_felem z2) { 259 fiat_p256_felem x_out, y_out, z_out; 260 fiat_p256_limb_t z1nz = fiat_p256_nz(z1); 261 fiat_p256_limb_t z2nz = fiat_p256_nz(z2); 262 263 // z1z1 = z1z1 = z1**2 264 fiat_p256_felem z1z1; 265 fiat_p256_square(z1z1, z1); 266 267 fiat_p256_felem u1, s1, two_z1z2; 268 if (!mixed) { 269 // z2z2 = z2**2 270 fiat_p256_felem z2z2; 271 fiat_p256_square(z2z2, z2); 272 273 // u1 = x1*z2z2 274 fiat_p256_mul(u1, x1, z2z2); 275 276 // two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 277 fiat_p256_add(two_z1z2, z1, z2); 278 fiat_p256_square(two_z1z2, two_z1z2); 279 fiat_p256_sub(two_z1z2, two_z1z2, z1z1); 280 fiat_p256_sub(two_z1z2, two_z1z2, z2z2); 281 282 // s1 = y1 * z2**3 283 fiat_p256_mul(s1, z2, z2z2); 284 fiat_p256_mul(s1, s1, y1); 285 } else { 286 // We'll assume z2 = 1 (special case z2 = 0 is handled later). 287 288 // u1 = x1*z2z2 289 fiat_p256_copy(u1, x1); 290 // two_z1z2 = 2z1z2 291 fiat_p256_add(two_z1z2, z1, z1); 292 // s1 = y1 * z2**3 293 fiat_p256_copy(s1, y1); 294 } 295 296 // u2 = x2*z1z1 297 fiat_p256_felem u2; 298 fiat_p256_mul(u2, x2, z1z1); 299 300 // h = u2 - u1 301 fiat_p256_felem h; 302 fiat_p256_sub(h, u2, u1); 303 304 fiat_p256_limb_t xneq = fiat_p256_nz(h); 305 306 // z_out = two_z1z2 * h 307 fiat_p256_mul(z_out, h, two_z1z2); 308 309 // z1z1z1 = z1 * z1z1 310 fiat_p256_felem z1z1z1; 311 fiat_p256_mul(z1z1z1, z1, z1z1); 312 313 // s2 = y2 * z1**3 314 fiat_p256_felem s2; 315 fiat_p256_mul(s2, y2, z1z1z1); 316 317 // r = (s2 - s1)*2 318 fiat_p256_felem r; 319 fiat_p256_sub(r, s2, s1); 320 fiat_p256_add(r, r, r); 321 322 fiat_p256_limb_t yneq = fiat_p256_nz(r); 323 324 fiat_p256_limb_t is_nontrivial_double = constant_time_is_zero_w(xneq | yneq) & 325 ~constant_time_is_zero_w(z1nz) & 326 ~constant_time_is_zero_w(z2nz); 327 if (constant_time_declassify_w(is_nontrivial_double)) { 328 fiat_p256_point_double(x3, y3, z3, x1, y1, z1); 329 return; 330 } 331 332 // I = (2h)**2 333 fiat_p256_felem i; 334 fiat_p256_add(i, h, h); 335 fiat_p256_square(i, i); 336 337 // J = h * I 338 fiat_p256_felem j; 339 fiat_p256_mul(j, h, i); 340 341 // V = U1 * I 342 fiat_p256_felem v; 343 fiat_p256_mul(v, u1, i); 344 345 // x_out = r**2 - J - 2V 346 fiat_p256_square(x_out, r); 347 fiat_p256_sub(x_out, x_out, j); 348 fiat_p256_sub(x_out, x_out, v); 349 fiat_p256_sub(x_out, x_out, v); 350 351 // y_out = r(V-x_out) - 2 * s1 * J 352 fiat_p256_sub(y_out, v, x_out); 353 fiat_p256_mul(y_out, y_out, r); 354 fiat_p256_felem s1j; 355 fiat_p256_mul(s1j, s1, j); 356 fiat_p256_sub(y_out, y_out, s1j); 357 fiat_p256_sub(y_out, y_out, s1j); 358 359 fiat_p256_cmovznz(x_out, z1nz, x2, x_out); 360 fiat_p256_cmovznz(x3, z2nz, x1, x_out); 361 fiat_p256_cmovznz(y_out, z1nz, y2, y_out); 362 fiat_p256_cmovznz(y3, z2nz, y1, y_out); 363 fiat_p256_cmovznz(z_out, z1nz, z2, z_out); 364 fiat_p256_cmovznz(z3, z2nz, z1, z_out); 365} 366 367#include "./p256_table.h" 368 369// fiat_p256_select_point_affine selects the |idx-1|th point from a 370// precomputation table and copies it to out. If |idx| is zero, the output is 371// the point at infinity. 372static void fiat_p256_select_point_affine( 373 const fiat_p256_limb_t idx, size_t size, 374 const fiat_p256_felem pre_comp[/*size*/][2], fiat_p256_felem out[3]) { 375 OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); 376 for (size_t i = 0; i < size; i++) { 377 fiat_p256_limb_t mismatch = i ^ (idx - 1); 378 fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); 379 fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); 380 } 381 fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one); 382} 383 384// fiat_p256_select_point selects the |idx|th point from a precomputation table 385// and copies it to out. 386static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size, 387 const fiat_p256_felem pre_comp[/*size*/][3], 388 fiat_p256_felem out[3]) { 389 OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); 390 for (size_t i = 0; i < size; i++) { 391 fiat_p256_limb_t mismatch = i ^ idx; 392 fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); 393 fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); 394 fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]); 395 } 396} 397 398// fiat_p256_get_bit returns the |i|th bit in |in|. 399static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) { 400 if (i < 0 || i >= 256) { 401 return 0; 402 } 403#if defined(OPENSSL_64_BIT) 404 static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit"); 405 return (in->words[i >> 6] >> (i & 63)) & 1; 406#else 407 static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit"); 408 return (in->words[i >> 5] >> (i & 31)) & 1; 409#endif 410} 411 412// OPENSSL EC_METHOD FUNCTIONS 413 414// Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 415// (X/Z^2, Y/Z^3). 416static int ec_GFp_nistp256_point_get_affine_coordinates( 417 const EC_GROUP *group, const EC_JACOBIAN *point, EC_FELEM *x_out, 418 EC_FELEM *y_out) { 419 if (constant_time_declassify_int( 420 ec_GFp_simple_is_at_infinity(group, point))) { 421 OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY); 422 return 0; 423 } 424 425 fiat_p256_felem z1, z2; 426 fiat_p256_from_generic(z1, &point->Z); 427 fiat_p256_inv_square(z2, z1); 428 429 if (x_out != NULL) { 430 fiat_p256_felem x; 431 fiat_p256_from_generic(x, &point->X); 432 fiat_p256_mul(x, x, z2); 433 fiat_p256_to_generic(x_out, x); 434 } 435 436 if (y_out != NULL) { 437 fiat_p256_felem y; 438 fiat_p256_from_generic(y, &point->Y); 439 fiat_p256_square(z2, z2); // z^-4 440 fiat_p256_mul(y, y, z1); // y * z 441 fiat_p256_mul(y, y, z2); // y * z^-3 442 fiat_p256_to_generic(y_out, y); 443 } 444 445 return 1; 446} 447 448static void ec_GFp_nistp256_add(const EC_GROUP *group, EC_JACOBIAN *r, 449 const EC_JACOBIAN *a, const EC_JACOBIAN *b) { 450 fiat_p256_felem x1, y1, z1, x2, y2, z2; 451 fiat_p256_from_generic(x1, &a->X); 452 fiat_p256_from_generic(y1, &a->Y); 453 fiat_p256_from_generic(z1, &a->Z); 454 fiat_p256_from_generic(x2, &b->X); 455 fiat_p256_from_generic(y2, &b->Y); 456 fiat_p256_from_generic(z2, &b->Z); 457 fiat_p256_point_add(x1, y1, z1, x1, y1, z1, 0 /* both Jacobian */, x2, y2, 458 z2); 459 fiat_p256_to_generic(&r->X, x1); 460 fiat_p256_to_generic(&r->Y, y1); 461 fiat_p256_to_generic(&r->Z, z1); 462} 463 464static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r, 465 const EC_JACOBIAN *a) { 466 fiat_p256_felem x, y, z; 467 fiat_p256_from_generic(x, &a->X); 468 fiat_p256_from_generic(y, &a->Y); 469 fiat_p256_from_generic(z, &a->Z); 470 fiat_p256_point_double(x, y, z, x, y, z); 471 fiat_p256_to_generic(&r->X, x); 472 fiat_p256_to_generic(&r->Y, y); 473 fiat_p256_to_generic(&r->Z, z); 474} 475 476static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, 477 const EC_JACOBIAN *p, 478 const EC_SCALAR *scalar) { 479 fiat_p256_felem p_pre_comp[17][3]; 480 OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); 481 // Precompute multiples. 482 fiat_p256_from_generic(p_pre_comp[1][0], &p->X); 483 fiat_p256_from_generic(p_pre_comp[1][1], &p->Y); 484 fiat_p256_from_generic(p_pre_comp[1][2], &p->Z); 485 for (size_t j = 2; j <= 16; ++j) { 486 if (j & 1) { 487 fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], 488 p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 489 0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], 490 p_pre_comp[j - 1][2]); 491 } else { 492 fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1], 493 p_pre_comp[j][2], p_pre_comp[j / 2][0], 494 p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); 495 } 496 } 497 498 // Set nq to the point at infinity. 499 fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; 500 501 // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. 502 int skip = 1; // Save two point operations in the first round. 503 for (size_t i = 255; i < 256; i--) { 504 // double 505 if (!skip) { 506 fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 507 } 508 509 // do other additions every 5 doublings 510 if (i % 5 == 0) { 511 crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5; 512 bits |= fiat_p256_get_bit(scalar, i + 3) << 4; 513 bits |= fiat_p256_get_bit(scalar, i + 2) << 3; 514 bits |= fiat_p256_get_bit(scalar, i + 1) << 2; 515 bits |= fiat_p256_get_bit(scalar, i) << 1; 516 bits |= fiat_p256_get_bit(scalar, i - 1); 517 crypto_word_t sign, digit; 518 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 519 520 // select the point to add or subtract, in constant time. 521 fiat_p256_select_point((fiat_p256_limb_t)digit, 17, 522 (const fiat_p256_felem(*)[3])p_pre_comp, tmp); 523 fiat_p256_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. 524 fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp); 525 526 if (!skip) { 527 fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 528 0 /* mixed */, tmp[0], tmp[1], tmp[2]); 529 } else { 530 fiat_p256_copy(nq[0], tmp[0]); 531 fiat_p256_copy(nq[1], tmp[1]); 532 fiat_p256_copy(nq[2], tmp[2]); 533 skip = 0; 534 } 535 } 536 } 537 538 fiat_p256_to_generic(&r->X, nq[0]); 539 fiat_p256_to_generic(&r->Y, nq[1]); 540 fiat_p256_to_generic(&r->Z, nq[2]); 541} 542 543static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, 544 EC_JACOBIAN *r, 545 const EC_SCALAR *scalar) { 546 // Set nq to the point at infinity. 547 fiat_p256_felem nq[3] = {{0}, {0}, {0}}, tmp[3]; 548 549 int skip = 1; // Save two point operations in the first round. 550 for (size_t i = 31; i < 32; i--) { 551 if (!skip) { 552 fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 553 } 554 555 // First, look 32 bits upwards. 556 crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3; 557 bits |= fiat_p256_get_bit(scalar, i + 160) << 2; 558 bits |= fiat_p256_get_bit(scalar, i + 96) << 1; 559 bits |= fiat_p256_get_bit(scalar, i + 32); 560 // Select the point to add, in constant time. 561 fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, 562 fiat_p256_g_pre_comp[1], tmp); 563 564 if (!skip) { 565 fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 566 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 567 } else { 568 fiat_p256_copy(nq[0], tmp[0]); 569 fiat_p256_copy(nq[1], tmp[1]); 570 fiat_p256_copy(nq[2], tmp[2]); 571 skip = 0; 572 } 573 574 // Second, look at the current position. 575 bits = fiat_p256_get_bit(scalar, i + 192) << 3; 576 bits |= fiat_p256_get_bit(scalar, i + 128) << 2; 577 bits |= fiat_p256_get_bit(scalar, i + 64) << 1; 578 bits |= fiat_p256_get_bit(scalar, i); 579 // Select the point to add, in constant time. 580 fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15, 581 fiat_p256_g_pre_comp[0], tmp); 582 fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */, 583 tmp[0], tmp[1], tmp[2]); 584 } 585 586 fiat_p256_to_generic(&r->X, nq[0]); 587 fiat_p256_to_generic(&r->Y, nq[1]); 588 fiat_p256_to_generic(&r->Z, nq[2]); 589} 590 591static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, 592 EC_JACOBIAN *r, 593 const EC_SCALAR *g_scalar, 594 const EC_JACOBIAN *p, 595 const EC_SCALAR *p_scalar) { 596#define P256_WSIZE_PUBLIC 4 597 // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. 598 fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; 599 fiat_p256_from_generic(p_pre_comp[0][0], &p->X); 600 fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); 601 fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); 602 fiat_p256_felem p2[3]; 603 fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], 604 p_pre_comp[0][1], p_pre_comp[0][2]); 605 for (size_t i = 1; i < OPENSSL_ARRAY_SIZE(p_pre_comp); i++) { 606 fiat_p256_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2], 607 p_pre_comp[i - 1][0], p_pre_comp[i - 1][1], 608 p_pre_comp[i - 1][2], 0 /* not mixed */, p2[0], p2[1], 609 p2[2]); 610 } 611 612 // Set up the coefficients for |p_scalar|. 613 int8_t p_wNAF[257]; 614 ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); 615 616 // Set |ret| to the point at infinity. 617 int skip = 1; // Save some point operations. 618 fiat_p256_felem ret[3] = {{0}, {0}, {0}}; 619 for (int i = 256; i >= 0; i--) { 620 if (!skip) { 621 fiat_p256_point_double(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2]); 622 } 623 624 // For the |g_scalar|, we use the precomputed table without the 625 // constant-time lookup. 626 if (i <= 31) { 627 // First, look 32 bits upwards. 628 crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3; 629 bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2; 630 bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1; 631 bits |= fiat_p256_get_bit(g_scalar, i + 32); 632 if (bits != 0) { 633 size_t index = (size_t)(bits - 1); 634 fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 635 1 /* mixed */, fiat_p256_g_pre_comp[1][index][0], 636 fiat_p256_g_pre_comp[1][index][1], 637 fiat_p256_one); 638 skip = 0; 639 } 640 641 // Second, look at the current position. 642 bits = fiat_p256_get_bit(g_scalar, i + 192) << 3; 643 bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2; 644 bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1; 645 bits |= fiat_p256_get_bit(g_scalar, i); 646 if (bits != 0) { 647 size_t index = (size_t)(bits - 1); 648 fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 649 1 /* mixed */, fiat_p256_g_pre_comp[0][index][0], 650 fiat_p256_g_pre_comp[0][index][1], 651 fiat_p256_one); 652 skip = 0; 653 } 654 } 655 656 int digit = p_wNAF[i]; 657 if (digit != 0) { 658 assert(digit & 1); 659 size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1); 660 fiat_p256_felem *y = &p_pre_comp[idx][1], tmp; 661 if (digit < 0) { 662 fiat_p256_opp(tmp, p_pre_comp[idx][1]); 663 y = &tmp; 664 } 665 if (!skip) { 666 fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2], 667 0 /* not mixed */, p_pre_comp[idx][0], *y, 668 p_pre_comp[idx][2]); 669 } else { 670 fiat_p256_copy(ret[0], p_pre_comp[idx][0]); 671 fiat_p256_copy(ret[1], *y); 672 fiat_p256_copy(ret[2], p_pre_comp[idx][2]); 673 skip = 0; 674 } 675 } 676 } 677 678 fiat_p256_to_generic(&r->X, ret[0]); 679 fiat_p256_to_generic(&r->Y, ret[1]); 680 fiat_p256_to_generic(&r->Z, ret[2]); 681} 682 683static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group, 684 const EC_JACOBIAN *p, 685 const EC_SCALAR *r) { 686 if (ec_GFp_simple_is_at_infinity(group, p)) { 687 return 0; 688 } 689 690 // We wish to compare X/Z^2 with r. This is equivalent to comparing X with 691 // r*Z^2. Note that X and Z are represented in Montgomery form, while r is 692 // not. 693 fiat_p256_felem Z2_mont; 694 fiat_p256_from_generic(Z2_mont, &p->Z); 695 fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont); 696 697 fiat_p256_felem r_Z2; 698 fiat_p256_from_words(r_Z2, r->words); // r < order < p, so this is valid. 699 fiat_p256_mul(r_Z2, r_Z2, Z2_mont); 700 701 fiat_p256_felem X; 702 fiat_p256_from_generic(X, &p->X); 703 fiat_p256_from_montgomery(X, X); 704 705 if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { 706 return 1; 707 } 708 709 // During signing the x coefficient is reduced modulo the group order. 710 // Therefore there is a small possibility, less than 1/2^128, that group_order 711 // < p.x < P. in that case we need not only to compare against |r| but also to 712 // compare against r+group_order. 713 assert(group->field.N.width == group->order.N.width); 714 EC_FELEM tmp; 715 BN_ULONG carry = 716 bn_add_words(tmp.words, r->words, group->order.N.d, group->field.N.width); 717 if (carry == 0 && 718 bn_less_than_words(tmp.words, group->field.N.d, group->field.N.width)) { 719 fiat_p256_from_generic(r_Z2, &tmp); 720 fiat_p256_mul(r_Z2, r_Z2, Z2_mont); 721 if (OPENSSL_memcmp(&r_Z2, &X, sizeof(r_Z2)) == 0) { 722 return 1; 723 } 724 } 725 726 return 0; 727} 728 729DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistp256_method) { 730 out->point_get_affine_coordinates = 731 ec_GFp_nistp256_point_get_affine_coordinates; 732 out->add = ec_GFp_nistp256_add; 733 out->dbl = ec_GFp_nistp256_dbl; 734 out->mul = ec_GFp_nistp256_point_mul; 735 out->mul_base = ec_GFp_nistp256_point_mul_base; 736 out->mul_public = ec_GFp_nistp256_point_mul_public; 737 out->felem_mul = ec_GFp_mont_felem_mul; 738 out->felem_sqr = ec_GFp_mont_felem_sqr; 739 out->felem_to_bytes = ec_GFp_mont_felem_to_bytes; 740 out->felem_from_bytes = ec_GFp_mont_felem_from_bytes; 741 out->felem_reduce = ec_GFp_mont_felem_reduce; 742 // TODO(davidben): This should use the specialized field arithmetic 743 // implementation, rather than the generic one. 744 out->felem_exp = ec_GFp_mont_felem_exp; 745 out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery; 746 out->scalar_to_montgomery_inv_vartime = 747 ec_simple_scalar_to_montgomery_inv_vartime; 748 out->cmp_x_coordinate = ec_GFp_nistp256_cmp_x_coordinate; 749} 750