• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2014, Intel Corporation.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // Developers and authors:
16 // Shay Gueron (1, 2), and Vlad Krasnov (1)
17 // (1) Intel Corporation, Israel Development Center
18 // (2) University of Haifa
19 // Reference:
20 // S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
21 //                          256 Bit Primes"
22 
23 #include <openssl/ec.h>
24 
25 #include <assert.h>
26 #include <stdint.h>
27 #include <string.h>
28 
29 #include <openssl/bn.h>
30 #include <openssl/crypto.h>
31 #include <openssl/err.h>
32 
33 #include "../bn/internal.h"
34 #include "../delocate.h"
35 #include "../../internal.h"
36 #include "internal.h"
37 #include "p256-x86_64.h"
38 
39 
40 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
41     !defined(OPENSSL_SMALL)
42 
43 typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
44 
45 // One converted into the Montgomery domain
46 static const BN_ULONG ONE[P256_LIMBS] = {
47     TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
48     TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
49 };
50 
51 // Precomputed tables for the default generator
52 #include "p256-x86_64-table.h"
53 
54 // Recode window to a signed digit, see util-64.c for details
booth_recode_w5(unsigned in)55 static unsigned booth_recode_w5(unsigned in) {
56   unsigned s, d;
57 
58   s = ~((in >> 5) - 1);
59   d = (1 << 6) - in - 1;
60   d = (d & s) | (in & ~s);
61   d = (d >> 1) + (d & 1);
62 
63   return (d << 1) + (s & 1);
64 }
65 
booth_recode_w7(unsigned in)66 static unsigned booth_recode_w7(unsigned in) {
67   unsigned s, d;
68 
69   s = ~((in >> 7) - 1);
70   d = (1 << 8) - in - 1;
71   d = (d & s) | (in & ~s);
72   d = (d >> 1) + (d & 1);
73 
74   return (d << 1) + (s & 1);
75 }
76 
77 // copy_conditional copies |src| to |dst| if |move| is one and leaves it as-is
78 // if |move| is zero.
79 //
80 // WARNING: this breaks the usual convention of constant-time functions
81 // returning masks.
copy_conditional(BN_ULONG dst[P256_LIMBS],const BN_ULONG src[P256_LIMBS],BN_ULONG move)82 static void copy_conditional(BN_ULONG dst[P256_LIMBS],
83                              const BN_ULONG src[P256_LIMBS], BN_ULONG move) {
84   BN_ULONG mask1 = ((BN_ULONG)0) - move;
85   BN_ULONG mask2 = ~mask1;
86 
87   dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
88   dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
89   dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
90   dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
91   if (P256_LIMBS == 8) {
92     dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
93     dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
94     dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
95     dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
96   }
97 }
98 
99 // is_not_zero returns one iff in != 0 and zero otherwise.
100 //
101 // WARNING: this breaks the usual convention of constant-time functions
102 // returning masks.
103 //
104 // (define-fun is_not_zero ((in (_ BitVec 64))) (_ BitVec 64)
105 //   (bvlshr (bvor in (bvsub #x0000000000000000 in)) #x000000000000003f)
106 // )
107 //
108 // (declare-fun x () (_ BitVec 64))
109 //
110 // (assert (and (= x #x0000000000000000) (= (is_not_zero x) #x0000000000000001)))
111 // (check-sat)
112 //
113 // (assert (and (not (= x #x0000000000000000)) (= (is_not_zero x) #x0000000000000000)))
114 // (check-sat)
115 //
is_not_zero(BN_ULONG in)116 static BN_ULONG is_not_zero(BN_ULONG in) {
117   in |= (0 - in);
118   in >>= BN_BITS2 - 1;
119   return in;
120 }
121 
122 // ecp_nistz256_mod_inverse_mont sets |r| to (|in| * 2^-256)^-1 * 2^256 mod p.
123 // That is, |r| is the modular inverse of |in| for input and output in the
124 // Montgomery domain.
ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS],const BN_ULONG in[P256_LIMBS])125 static void ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS],
126                                           const BN_ULONG in[P256_LIMBS]) {
127   /* The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff
128      ffffffff
129      We use FLT and used poly-2 as exponent */
130   BN_ULONG p2[P256_LIMBS];
131   BN_ULONG p4[P256_LIMBS];
132   BN_ULONG p8[P256_LIMBS];
133   BN_ULONG p16[P256_LIMBS];
134   BN_ULONG p32[P256_LIMBS];
135   BN_ULONG res[P256_LIMBS];
136   int i;
137 
138   ecp_nistz256_sqr_mont(res, in);
139   ecp_nistz256_mul_mont(p2, res, in);  // 3*p
140 
141   ecp_nistz256_sqr_mont(res, p2);
142   ecp_nistz256_sqr_mont(res, res);
143   ecp_nistz256_mul_mont(p4, res, p2);  // f*p
144 
145   ecp_nistz256_sqr_mont(res, p4);
146   ecp_nistz256_sqr_mont(res, res);
147   ecp_nistz256_sqr_mont(res, res);
148   ecp_nistz256_sqr_mont(res, res);
149   ecp_nistz256_mul_mont(p8, res, p4);  // ff*p
150 
151   ecp_nistz256_sqr_mont(res, p8);
152   for (i = 0; i < 7; i++) {
153     ecp_nistz256_sqr_mont(res, res);
154   }
155   ecp_nistz256_mul_mont(p16, res, p8);  // ffff*p
156 
157   ecp_nistz256_sqr_mont(res, p16);
158   for (i = 0; i < 15; i++) {
159     ecp_nistz256_sqr_mont(res, res);
160   }
161   ecp_nistz256_mul_mont(p32, res, p16);  // ffffffff*p
162 
163   ecp_nistz256_sqr_mont(res, p32);
164   for (i = 0; i < 31; i++) {
165     ecp_nistz256_sqr_mont(res, res);
166   }
167   ecp_nistz256_mul_mont(res, res, in);
168 
169   for (i = 0; i < 32 * 4; i++) {
170     ecp_nistz256_sqr_mont(res, res);
171   }
172   ecp_nistz256_mul_mont(res, res, p32);
173 
174   for (i = 0; i < 32; i++) {
175     ecp_nistz256_sqr_mont(res, res);
176   }
177   ecp_nistz256_mul_mont(res, res, p32);
178 
179   for (i = 0; i < 16; i++) {
180     ecp_nistz256_sqr_mont(res, res);
181   }
182   ecp_nistz256_mul_mont(res, res, p16);
183 
184   for (i = 0; i < 8; i++) {
185     ecp_nistz256_sqr_mont(res, res);
186   }
187   ecp_nistz256_mul_mont(res, res, p8);
188 
189   ecp_nistz256_sqr_mont(res, res);
190   ecp_nistz256_sqr_mont(res, res);
191   ecp_nistz256_sqr_mont(res, res);
192   ecp_nistz256_sqr_mont(res, res);
193   ecp_nistz256_mul_mont(res, res, p4);
194 
195   ecp_nistz256_sqr_mont(res, res);
196   ecp_nistz256_sqr_mont(res, res);
197   ecp_nistz256_mul_mont(res, res, p2);
198 
199   ecp_nistz256_sqr_mont(res, res);
200   ecp_nistz256_sqr_mont(res, res);
201   ecp_nistz256_mul_mont(r, res, in);
202 }
203 
204 // ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
205 // returns one if it fits. Otherwise it returns zero.
ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],const BIGNUM * in)206 static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
207                                              const BIGNUM *in) {
208   return bn_copy_words(out, P256_LIMBS, in);
209 }
210 
211 // r = p * p_scalar
ecp_nistz256_windowed_mul(const EC_GROUP * group,P256_POINT * r,const EC_POINT * p,const EC_SCALAR * p_scalar)212 static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
213                                      const EC_POINT *p,
214                                      const EC_SCALAR *p_scalar) {
215   assert(p != NULL);
216   assert(p_scalar != NULL);
217 
218   static const unsigned kWindowSize = 5;
219   static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
220 
221   // A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
222   // add no more than 63 bytes of overhead. Thus, |table| should require
223   // ~1599 ((96 * 16) + 63) bytes of stack space.
224   alignas(64) P256_POINT table[16];
225   uint8_t p_str[33];
226   OPENSSL_memcpy(p_str, p_scalar->bytes, 32);
227   p_str[32] = 0;
228 
229   // table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
230   // not stored. All other values are actually stored with an offset of -1 in
231   // table.
232   P256_POINT *row = table;
233 
234   if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &p->X) ||
235       !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) ||
236       !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) {
237     OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
238     return 0;
239   }
240 
241   ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
242   ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
243   ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
244   ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
245   ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
246   ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
247   ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
248   ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
249   ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
250   ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
251   ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
252   ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
253   ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
254   ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
255   ecp_nistz256_point_double(&row[16 - 1], &row[8 - 1]);
256 
257   BN_ULONG tmp[P256_LIMBS];
258   alignas(32) P256_POINT h;
259   unsigned index = 255;
260   unsigned wvalue = p_str[(index - 1) / 8];
261   wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
262 
263   ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
264 
265   while (index >= 5) {
266     if (index != 255) {
267       unsigned off = (index - 1) / 8;
268 
269       wvalue = p_str[off] | p_str[off + 1] << 8;
270       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
271 
272       wvalue = booth_recode_w5(wvalue);
273 
274       ecp_nistz256_select_w5(&h, table, wvalue >> 1);
275 
276       ecp_nistz256_neg(tmp, h.Y);
277       copy_conditional(h.Y, tmp, (wvalue & 1));
278 
279       ecp_nistz256_point_add(r, r, &h);
280     }
281 
282     index -= kWindowSize;
283 
284     ecp_nistz256_point_double(r, r);
285     ecp_nistz256_point_double(r, r);
286     ecp_nistz256_point_double(r, r);
287     ecp_nistz256_point_double(r, r);
288     ecp_nistz256_point_double(r, r);
289   }
290 
291   // Final window
292   wvalue = p_str[0];
293   wvalue = (wvalue << 1) & kMask;
294 
295   wvalue = booth_recode_w5(wvalue);
296 
297   ecp_nistz256_select_w5(&h, table, wvalue >> 1);
298 
299   ecp_nistz256_neg(tmp, h.Y);
300   copy_conditional(h.Y, tmp, wvalue & 1);
301 
302   ecp_nistz256_point_add(r, r, &h);
303 
304   return 1;
305 }
306 
ecp_nistz256_points_mul(const EC_GROUP * group,EC_POINT * r,const EC_SCALAR * g_scalar,const EC_POINT * p_,const EC_SCALAR * p_scalar,BN_CTX * ctx)307 static int ecp_nistz256_points_mul(const EC_GROUP *group, EC_POINT *r,
308                                    const EC_SCALAR *g_scalar,
309                                    const EC_POINT *p_,
310                                    const EC_SCALAR *p_scalar, BN_CTX *ctx) {
311   assert((p_ != NULL) == (p_scalar != NULL));
312 
313   static const unsigned kWindowSize = 7;
314   static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
315 
316   alignas(32) union {
317     P256_POINT p;
318     P256_POINT_AFFINE a;
319   } t, p;
320 
321   if (g_scalar != NULL) {
322     uint8_t p_str[33];
323     OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
324     p_str[32] = 0;
325 
326     // First window
327     unsigned wvalue = (p_str[0] << 1) & kMask;
328     unsigned index = kWindowSize;
329 
330     wvalue = booth_recode_w7(wvalue);
331 
332     const PRECOMP256_ROW *const precomputed_table =
333         (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
334     ecp_nistz256_select_w7(&p.a, precomputed_table[0], wvalue >> 1);
335 
336     ecp_nistz256_neg(p.p.Z, p.p.Y);
337     copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
338 
339     // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
340     // is infinity and |ONE| otherwise. |p| was computed from the table, so it
341     // is infinity iff |wvalue >> 1| is zero.
342     OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
343     copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
344 
345     for (int i = 1; i < 37; i++) {
346       unsigned off = (index - 1) / 8;
347       wvalue = p_str[off] | p_str[off + 1] << 8;
348       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
349       index += kWindowSize;
350 
351       wvalue = booth_recode_w7(wvalue);
352 
353       ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1);
354 
355       ecp_nistz256_neg(t.p.Z, t.a.Y);
356       copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
357 
358       ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
359     }
360   }
361 
362   const int p_is_infinity = g_scalar == NULL;
363   if (p_scalar != NULL) {
364     P256_POINT *out = &t.p;
365     if (p_is_infinity) {
366       out = &p.p;
367     }
368 
369     if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar)) {
370       return 0;
371     }
372 
373     if (!p_is_infinity) {
374       ecp_nistz256_point_add(&p.p, &p.p, out);
375     }
376   }
377 
378   // Not constant-time, but we're only operating on the public output.
379   if (!bn_set_words(&r->X, p.p.X, P256_LIMBS) ||
380       !bn_set_words(&r->Y, p.p.Y, P256_LIMBS) ||
381       !bn_set_words(&r->Z, p.p.Z, P256_LIMBS)) {
382     return 0;
383   }
384 
385   return 1;
386 }
387 
ecp_nistz256_get_affine(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)388 static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,
389                                    BIGNUM *x, BIGNUM *y, BN_CTX *ctx) {
390   BN_ULONG z_inv2[P256_LIMBS];
391   BN_ULONG z_inv3[P256_LIMBS];
392   BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];
393 
394   if (EC_POINT_is_at_infinity(group, point)) {
395     OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
396     return 0;
397   }
398 
399   if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) ||
400       !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) ||
401       !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) {
402     OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
403     return 0;
404   }
405 
406   ecp_nistz256_mod_inverse_mont(z_inv3, point_z);
407   ecp_nistz256_sqr_mont(z_inv2, z_inv3);
408 
409   // Instead of using |ecp_nistz256_from_mont| to convert the |x| coordinate
410   // and then calling |ecp_nistz256_from_mont| again to convert the |y|
411   // coordinate below, convert the common factor |z_inv2| once now, saving one
412   // reduction.
413   ecp_nistz256_from_mont(z_inv2, z_inv2);
414 
415   if (x != NULL) {
416     BN_ULONG x_aff[P256_LIMBS];
417     ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
418     if (!bn_set_words(x, x_aff, P256_LIMBS)) {
419       OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
420       return 0;
421     }
422   }
423 
424   if (y != NULL) {
425     BN_ULONG y_aff[P256_LIMBS];
426     ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
427     ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
428     if (!bn_set_words(y, y_aff, P256_LIMBS)) {
429       OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
430       return 0;
431     }
432   }
433 
434   return 1;
435 }
436 
DEFINE_METHOD_FUNCTION(EC_METHOD,EC_GFp_nistz256_method)437 DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
438   out->group_init = ec_GFp_mont_group_init;
439   out->group_finish = ec_GFp_mont_group_finish;
440   out->group_set_curve = ec_GFp_mont_group_set_curve;
441   out->point_get_affine_coordinates = ecp_nistz256_get_affine;
442   out->mul = ecp_nistz256_points_mul;
443   out->mul_public = ecp_nistz256_points_mul;
444   out->field_mul = ec_GFp_mont_field_mul;
445   out->field_sqr = ec_GFp_mont_field_sqr;
446   out->field_encode = ec_GFp_mont_field_encode;
447   out->field_decode = ec_GFp_mont_field_decode;
448 };
449 
450 #endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
451           !defined(OPENSSL_SMALL) */
452