• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of the openHiTLS project.
3  *
4  * openHiTLS is licensed under the Mulan PSL v2.
5  * You can use this software according to the terms and conditions of the Mulan PSL v2.
6  * You may obtain a copy of Mulan PSL v2 at:
7  *
8  *     http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  * See the Mulan PSL v2 for more details.
14  */
15 
16 
17 #include "hitls_build.h"
18 #ifdef HITLS_CRYPTO_X25519
19 
20 #include "x25519_asm.h"
21 #include "securec.h"
22 #include "curve25519_local.h"
23 #ifdef HITLS_CRYPTO_X25519_X8664
24 #include "crypt_utils.h"
25 #endif
26 // X25519 alternative implementation, faster but require asm
27 #define CURVE25519_51BITS_MASK 0x7ffffffffffff
28 #define CURVE25519_51BITS 51
29 
Fp51DataToPoly(Fp51 * out,const uint8_t in[32])30 static void Fp51DataToPoly(Fp51 *out, const uint8_t in[32])
31 {
32     uint64_t h[5];
33 
34     CURVE25519_BYTES7_LOAD(h, in); // load 7 bytes
35 
36     CURVE25519_BYTES6_LOAD(h + 1, in + 7); // load 6 bytes from in7 to h1
37     h[1] <<= 5; // shift 5 to fit 51 bits
38 
39     CURVE25519_BYTES7_LOAD(h + 2, in + 13); // load 7 bytes from in13 to h2
40     h[2] <<= 2; // shift 2 to fit 51 bits
41 
42     CURVE25519_BYTES6_LOAD(h + 3, in + 20); // load 6 bytes from in20 to h3
43     h[3] <<= 7; // shift 7 to fit 51 bits
44 
45     CURVE25519_BYTES6_LOAD(h + 4, in + 26); // load 6 bytes from in26 to h4
46     h[4] &= 0x7fffffffffff; // 41 bits mask = 0x7fffffffffff
47     h[4] <<= 4; // shift 4 to fit 51 bits
48 
49     h[1] |= h[0] >> CURVE25519_51BITS; // carry h[0] -> h[1]
50     h[0] &= CURVE25519_51BITS_MASK; // clear h[0]
51 
52     h[2] |= h[1] >> CURVE25519_51BITS; // carry h[1] -> h[2]
53     h[1] &= CURVE25519_51BITS_MASK; // clear h[1]
54 
55     h[3] |= h[2] >> CURVE25519_51BITS; // carry h[2] -> h[3]
56     h[2] &= CURVE25519_51BITS_MASK; // clear h[2]
57 
58     h[4] |= h[3] >> CURVE25519_51BITS; // carry h[3] -> h[4]
59     h[3] &= CURVE25519_51BITS_MASK; // clear h[3]
60 
61     out->data[0] = h[0]; // 0
62     out->data[1] = h[1]; // 1
63     out->data[2] = h[2]; // 2
64     out->data[3] = h[3]; // 3
65     out->data[4] = h[4]; // 4
66 }
67 
Fp51UnloadTo8Bits(uint8_t out[32],uint64_t h[5])68 static void Fp51UnloadTo8Bits(uint8_t out[32], uint64_t h[5])
69 {
70     // load from uint64 to uint8, load 8 bits at a time
71     out[0] = (uint8_t)h[0];
72     out[1] = (uint8_t)(h[0] >> 8); // load from position 8 to out[1]
73     out[2] = (uint8_t)(h[0] >> 16); // load from position 16 to out[2]
74     out[3] = (uint8_t)(h[0] >> 24); // load from position 24 to out[3]
75     out[4] = (uint8_t)(h[0] >> 32); // load from position 32 to out[4]
76     out[5] = (uint8_t)(h[0] >> 40); // load from position 40 to out[5]
77     // load from position 48 from h[1] and (8-5)=3 bits from h[1] to out[6]
78     out[6] = (uint8_t)((h[0] >> 48) | (uint8_t)(h[1] << 3));
79     out[7] = (uint8_t)(h[1] >> 5); // load h[1] from position 5 to out[7]
80     out[8] = (uint8_t)(h[1] >> 13); // load h[1] from position 13 to out[8]
81     out[9] = (uint8_t)(h[1] >> 21); // load h[1] from position 21 to out[9]
82     out[10] = (uint8_t)(h[1] >> 29); // load h[1] from position 29 to out[10]
83     out[11] = (uint8_t)(h[1] >> 37); // load h[1] from position 37 to out[11]
84     // load from position 45 from h[1] and (8-2)=6 bits from h[2] to out[12]
85     out[12] = (uint8_t)((h[1] >> 45) | (uint8_t)(h[2] << 6));
86     out[13] = (uint8_t)(h[2] >> 2); // load h[2] from position 2 to out[13]
87     out[14] = (uint8_t)(h[2] >> 10); // load h[2] from position 10 to out[14]
88     out[15] = (uint8_t)(h[2] >> 18); // load h[2] from position 18 to out[15]
89     out[16] = (uint8_t)(h[2] >> 26); // load h[2] from position 26 to out[16]
90     out[17] = (uint8_t)(h[2] >> 34); // load h[2] from position 34 to out[17]
91     out[18] = (uint8_t)(h[2] >> 42); // load h[2] from position 42 to out[18]
92     // load from position 50 from h[2] and (8-1)=7 bits from h[3] to out[19]
93     out[19] = (uint8_t)((h[2] >> 50) | (uint8_t)(h[3] << 1));
94     out[20] = (uint8_t)(h[3] >> 7); // load h[3] from position 7 to out[20]
95     out[21] = (uint8_t)(h[3] >> 15); // load h[3] from position 15 to out[21]
96     out[22] = (uint8_t)(h[3] >> 23); // load h[3] from position 23 to out[22]
97     out[23] = (uint8_t)(h[3] >> 31); // load h[3] from position 31 to out[23]
98     out[24] = (uint8_t)(h[3] >> 39); // load h[3] from position 39 to out[24]
99     // load from position 47 from h[3] and (4-4)=4 bits from h[4] to out[25]
100     out[25] = (uint8_t)((h[3] >> 47) | (uint8_t)(h[4] << 4));
101     out[26] = (uint8_t)(h[4] >> 4); // load h[4] from position 4 to out[26]
102     out[27] = (uint8_t)(h[4] >> 12); // load h[4] from position 12 to out[27]
103     out[28] = (uint8_t)(h[4] >> 20); // load h[4] from position 20 to out[28]
104     out[29] = (uint8_t)(h[4] >> 28); // load h[4] from position 28 to out[29]
105     out[30] = (uint8_t)(h[4] >> 36); // load h[4] from position 36 to out[30]
106     out[31] = (uint8_t)(h[4] >> 44); // load h[4] from position 44 to out[31]
107 }
108 
Fp51PolyToData(const Fp51 * in,uint8_t out[32])109 static void Fp51PolyToData(const Fp51 *in, uint8_t out[32])
110 {
111     uint64_t h[5];
112     h[0] = in->data[0]; // 0
113     h[1] = in->data[1]; // 1
114     h[2] = in->data[2]; // 2
115     h[3] = in->data[3]; // 3
116     h[4] = in->data[4]; // 4
117     uint64_t carry;
118 
119     carry = (h[0] + 19) >> CURVE25519_51BITS; // plus 19 then calculate carry
120     carry = (h[1] + carry) >> CURVE25519_51BITS; // carry of h[1]
121     carry = (h[2] + carry) >> CURVE25519_51BITS; // carry of h[2]
122     carry = (h[3] + carry) >> CURVE25519_51BITS; // carry of h[3]
123     carry = (h[4] + carry) >> CURVE25519_51BITS; // carry of h[4]
124 
125     h[0] += 19 * carry; // process carry h[4] -> h[0], h[0] += 19 * carry
126     h[1] += h[0] >> CURVE25519_51BITS; // process carry h[0] -> h[1]
127     h[0] &= CURVE25519_51BITS_MASK; // clear h[0]
128     h[2] += h[1] >> CURVE25519_51BITS; // process carry h[1] -> h[2]
129     h[1] &= CURVE25519_51BITS_MASK; // clear h[1]
130     h[3] += h[2] >> CURVE25519_51BITS; // process carry h[2] -> h[3]
131     h[2] &= CURVE25519_51BITS_MASK; // clear h[2]
132     h[4] += h[3] >> CURVE25519_51BITS; // process carry h[3] -> h[4]
133     h[3] &= CURVE25519_51BITS_MASK; // clear h[3]
134     h[4] &= CURVE25519_51BITS_MASK; // clear h[4]
135 
136     Fp51UnloadTo8Bits(out, h);
137 }
138 
139 /* out = in1 ^ (4 * 2 ^ (2 * times)) * in2 */
Fp51MultiSquare(Fp51 * in1,Fp51 * in2,Fp51 * out,int32_t times)140 static inline void Fp51MultiSquare(Fp51 *in1, Fp51 *in2, Fp51 *out, int32_t times)
141 {
142     int32_t i;
143     Fp51 temp1, temp2;
144     Fp51Square(&temp1, in1);
145     Fp51Square(&temp2, &temp1);
146     for (i = 0; i < times; i++) {
147         Fp51Square(&temp1, &temp2);
148         Fp51Square(&temp2, &temp1);
149     }
150     Fp51Mul(out, in2, &temp2);
151 }
152 
153 /* out = a ^ -1 */
Fp51Invert(Fp51 * out,const Fp51 * a)154 static void Fp51Invert(Fp51 *out, const Fp51 *a)
155 {
156     Fp51 a0;    /* save a^1         */
157     Fp51 a1;    /* save a^2         */
158     Fp51 a2;    /* save a^11        */
159     Fp51 a3;    /* save a^(2^5-1)   */
160     Fp51 a4;    /* save a^(2^10-1)  */
161     Fp51 a5;    /* save a^(2^20-1)  */
162     Fp51 a6;    /* save a^(2^40-1)  */
163     Fp51 a7;    /* save a^(2^50-1)  */
164     Fp51 a8;    /* save a^(2^100-1) */
165     Fp51 a9;    /* save a^(2^200-1) */
166     Fp51 a10;   /* save a^(2^250-1) */
167     Fp51 temp1, temp2;
168 
169     /* We know a×b=1(mod p), then a and b are inverses of mod p, i.e. a=b^(-1), b=a^(-1);
170      * According to Fermat's little theorem a^(p-1)=1(mod p), so a*a^(p-2)=1(mod p);
171      * So the inverse element of a is a^(-1) = a^(p-2)(mod p)
172      * Here it is, p=2^255-19, thus we need to compute a^(2^255-21)(mod(2^255-19))
173      */
174 
175     /* a^1 */
176     CURVE25519_FP51_COPY(a0.data, a->data);
177 
178     /* a^2 */
179     Fp51Square(&a1, &a0);
180 
181     /* a^4 */
182     Fp51Square(&temp1, &a1);
183 
184     /* a^8 */
185     Fp51Square(&temp2, &temp1);
186 
187     /* a^9 */
188     Fp51Mul(&temp1, &a0, &temp2);
189 
190     /* a^11 */
191     Fp51Mul(&a2, &a1, &temp1);
192 
193     /* a^22 */
194     Fp51Square(&temp2, &a2);
195 
196     /* a^(2^5-1) = a^(9+22) */
197     Fp51Mul(&a3, &temp1, &temp2);
198 
199     /* a^(2^10-1) = a^(2^10-2^5) * a^(2^5-1) */
200     Fp51Square(&temp1, &a3);
201     Fp51Square(&temp2, &temp1);
202     Fp51Square(&temp1, &temp2);
203     Fp51Square(&temp2, &temp1);
204     Fp51Square(&temp1, &temp2);
205     Fp51Mul(&a4, &a3, &temp1);
206 
207     /* a^(2^20-1) = a^(2^20-2^10) * a^(2^10-1) */
208     Fp51MultiSquare(&a4, &a4, &a5, 4); // (2 * 2) ^ 4
209 
210     /* a^(2^40-1) = a^(2^40-2^20) * a^(2^20-1) */
211     Fp51MultiSquare(&a5, &a5, &a6, 9); // (2 * 2) ^ 9
212 
213     /* a^(2^50-1) = a^(2^50-2^10) * a^(2^10-1) */
214     Fp51MultiSquare(&a6, &a4, &a7, 4); // (2 * 2) ^ 4
215 
216     /* a^(2^100-1) = a^(2^100-2^50) * a^(2^50-1) */
217     Fp51MultiSquare(&a7, &a7, &a8, 24); // (2 * 2) ^ 24
218 
219     /* a^(2^200-1) = a^(2^200-2^100) * a^(2^100-1) */
220     Fp51MultiSquare(&a8, &a8, &a9, 49); // (2 * 2) ^ 49
221 
222     /* a^(2^250-1) = a^(2^250-2^50) * a^(2^50-1) */
223     Fp51MultiSquare(&a9, &a7, &a10, 24); // (2 * 2) ^ 24
224 
225     /* a^(2^5*(2^250-1)) = (a^(2^250-1))^5 */
226     Fp51Square(&temp1, &a10);
227     Fp51Square(&temp2, &temp1);
228     Fp51Square(&temp1, &temp2);
229     Fp51Square(&temp2, &temp1);
230     Fp51Square(&temp1, &temp2);
231 
232     /* The output:a^(2^255-21) = a(2^5*(2^250-1)+11) = a^(2^5*(2^250-1)) * a^11 */
233     Fp51Mul(out, &a2, &temp1);
234 }
235 
Fp51ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])236 void Fp51ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
237 {
238     uint8_t k[32];
239     const uint8_t *u = point;
240     int32_t t;
241     uint32_t swap;
242     uint32_t kTemp;
243     Fp51 x1, x2, x3;
244     Fp51 z2, z3;
245     Fp51 t1, t2;
246 
247     /* Decord the scalar into k */
248     CURVE25519_DECODE_LITTLE_ENDIAN(k, scalar);
249 
250     /* Reference RFC 7748 section 5:The constant a24 is (486662 - 2) / 4 = 121665 for curve25519/X25519 */
251     Fp51DataToPoly(&x1, u);
252     CURVE25519_FP51_SET(x2.data, 1);
253     CURVE25519_FP51_SET(z2.data, 0);
254     CURVE25519_FP51_COPY(x3.data, x1.data);
255     CURVE25519_FP51_SET(z3.data, 1);
256     swap = 0;
257 
258     /* "bits" parameter set to 255 for x25519  */ /* For t = bits-1(254) down to 0: */
259     for (t = 254; t >= 0; t--) {
260         /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
261         kTemp = (k[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1;           /* kTemp = (k >> t) & 1 */
262         swap ^= kTemp;                                /* swap ^= kTemp */
263         CURVE25519_FP51_CSWAP(swap, x2.data, x3.data);  /* (x_2, x_3) = cswap(swap, x_2, x_3) */
264 
265         CURVE25519_FP51_CSWAP(swap, z2.data, z3.data);  /* (z_2, z_3) = cswap(swap, z_2, z_3) */
266         swap = kTemp;                                 /* swap = kTemp */
267         CURVE25519_FP51_SUB(t1.data, x3.data, z3.data);                /* x3 = D */
268         CURVE25519_FP51_SUB(t2.data, x2.data, z2.data);                /* t2 = B */
269         CURVE25519_FP51_ADD(x2.data, x2.data, z2.data);                /* t1 = A */
270         CURVE25519_FP51_ADD(z2.data, x3.data, z3.data);                /* x2 = C */
271 
272         Fp51Mul(&z3, &t1, &x2);
273         Fp51Mul(&z2, &z2, &t2);
274         Fp51Square(&t1, &t2);
275         Fp51Square(&t2, &x2);
276 
277         CURVE25519_FP51_ADD(x3.data, z3.data, z2.data);
278         CURVE25519_FP51_SUB(z2.data, z3.data, z2.data);
279         Fp51Mul(&x2, &t2, &t1);
280         CURVE25519_FP51_SUB(t2.data, t2.data, t1.data);
281         Fp51Square(&z2, &z2);
282         Fp51MulScalar(&z3, &t2); // z2 *= 121665 + 1 = 121666
283         Fp51Square(&x3, &x3);
284         CURVE25519_FP51_ADD(t1.data, t1.data, z3.data);
285         Fp51Mul(&z3, &x1, &z2);
286         Fp51Mul(&z2, &t2, &t1);
287     }
288 
289     CURVE25519_FP51_CSWAP(swap, x2.data, x3.data);
290     CURVE25519_FP51_CSWAP(swap, z2.data, z3.data);
291     /* Return x2 * (z2 ^ (p - 2)) */
292     Fp51Invert(&t1, &z2);
293     Fp51Mul(&t2, &x2, &t1);
294     Fp51PolyToData(&t2, out);
295     (void)memset_s(k, sizeof(k), 0, sizeof(k));
296 }
297 
298 #ifdef HITLS_CRYPTO_X25519_X8664
299 
300 #define CURVE25519_63BITS_MASK 0x7fffffffffffffff
301 #define CURVE25519_FP64_SET(dst, value)     \
302     do {                                    \
303         (dst)[0] = (value);                 \
304         (dst)[1] = 0;                       \
305         (dst)[2] = 0;                       \
306         (dst)[3] = 0;                       \
307     } while (0)
308 
309 #define CURVE25519_FP64_COPY(dst, src)     \
310     do {                                   \
311         (dst)[0] = (src)[0];               \
312         (dst)[1] = (src)[1];               \
313         (dst)[2] = (src)[2];               \
314         (dst)[3] = (src)[3];               \
315     } while (0)
316 
317 #define CURVE25519_BYTES8_LOAD(dst, src)                 \
318     do {                                                 \
319             dst =  (uint64_t)(src)[0];                   \
320             dst |= ((uint64_t)(src)[1]) << 8;            \
321             dst |= ((uint64_t)(src)[2]) << 16;           \
322             dst |= ((uint64_t)(src)[3]) << 24;           \
323             dst |= ((uint64_t)(src)[4]) << 32;           \
324             dst |= ((uint64_t)(src)[5]) << 40;           \
325             dst |= ((uint64_t)(src)[6]) << 48;           \
326             dst |= ((uint64_t)(src)[7]) << 56;           \
327     } while (0)
328 
329 #define CURVE25519_FP64_CSWAP(s, a, b)                                  \
330     do {                                                                \
331             uint64_t tt;                                                \
332             const uint64_t tsMacro = 0 - (uint64_t)(s);                 \
333             for (uint32_t ii = 0; ii < 4; ii++) {                       \
334                 tt = tsMacro & ((a)[ii] ^ (b)[ii]);                     \
335                 (a)[ii] = (a)[ii] ^ tt;                                 \
336                 (b)[ii] = (b)[ii] ^ tt;                                 \
337             }                                                           \
338     } while (0)
339 
Fp64DataToPoly(Fp64 h,const uint8_t * point)340 static void Fp64DataToPoly(Fp64 h, const uint8_t *point)
341 {
342     uint8_t *tmp = (uint8_t *)(uintptr_t)point;
343     CURVE25519_BYTES8_LOAD(h[0], tmp);
344     tmp += 8; // the second 8 bytes
345     CURVE25519_BYTES8_LOAD(h[1], tmp);
346     tmp += 8; // the third 8 bytes
347     CURVE25519_BYTES8_LOAD(h[2], tmp);
348     tmp += 8; // the forth 8 bytes
349     CURVE25519_BYTES8_LOAD(h[3], tmp);
350     h[3] &= CURVE25519_63BITS_MASK;
351     return;
352 }
353 
354 /* out = in1 ^ (4 * 2 ^ (2 * times)) * in2 */
Fp64MultiSqr(Fp64 in1,Fp64 in2,Fp64 out,int32_t times)355 static inline void Fp64MultiSqr(Fp64 in1, Fp64 in2, Fp64 out, int32_t times)
356 {
357     int32_t i;
358     Fp64 temp1, temp2;
359     Fp64Sqr(temp1, in1);
360     Fp64Sqr(temp2, temp1);
361     for (i = 0; i < times; i++) {
362         Fp64Sqr(temp1, temp2);
363         Fp64Sqr(temp2, temp1);
364     }
365     Fp64Mul(out, in2, temp2);
366 }
367 
Fe64Invert(Fp64 out,const Fp64 z)368 static void Fe64Invert(Fp64 out, const Fp64 z)
369 {
370     Fp64 t0;
371     Fp64 t1;
372     Fp64 t2;
373     Fp64 t3;
374     Fp64 t4;
375 
376     Fp64Sqr(t0, z); /* t^2 */
377     Fp64Sqr(t1, t0); /* t^4 */
378     Fp64Sqr(t1, t1); /* t^8 */
379     Fp64Mul(t1, z, t1); /* t^9 */
380     Fp64Mul(t0, t0, t1); /* t^11 */
381     Fp64Sqr(t2, t0); /* t^22 */
382     Fp64Mul(t1, t1, t2); /* t^(2^5-1) = t^(9+22) */
383 
384     /* t^(2^10-1) = t^(2^10-2^5) * t^(2^5-1) */
385     Fp64Sqr(t2, t1);
386     Fp64Sqr(t4, t2);
387     Fp64Sqr(t2, t4);
388     Fp64Sqr(t4, t2);
389     Fp64Sqr(t2, t4);
390     Fp64Mul(t1, t2, t1);
391 
392     /* t^(2^20-1) = t^(2^20-2^10) * t^(2^10-1) */
393     Fp64MultiSqr(t1, t1, t2, 4);
394 
395     /* t^(2^40-1) = t^(2^40-2^20) * t^(2^20-1) */
396     Fp64MultiSqr(t2, t2, t4, 9); // (2 * 2) ^ 9
397 
398     /* t^(2^50-1) = t^(2^50-2^10) * t^(2^10-1) */
399     Fp64MultiSqr(t4, t1, t2, 4); // (2 * 2) ^ 4
400 
401     /* t^(2^100-1) = t^(2^100-2^50) * t^(2^50-1) */
402     Fp64MultiSqr(t2, t2, t1, 24); // (2 * 2) ^ 24
403 
404     /* t^(2^200-1) = t^(2^200-2^100) * t^(2^100-1) */
405     Fp64MultiSqr(t1, t1, t4, 49); // (2 * 2) ^ 49
406 
407     /* t^(2^250-1) = t^(2^250-2^50) * t^(2^50-1) */
408     Fp64MultiSqr(t4, t2, t3, 24); // (2 * 2) ^ 24
409 
410     /* t^(2^5*(2^250-1)) = (t^(2^250-1))^5 */
411     Fp64Sqr(t1, t3);
412     Fp64Sqr(t2, t1);
413     Fp64Sqr(t1, t2);
414     Fp64Sqr(t2, t1);
415     Fp64Sqr(t1, t2);
416 
417     /* The output:t^(2^255-21) = t(2^5*(2^250-1)+11) = t^(2^5*(2^250-1)) * t^11 */
418     Fp64Mul(out, t0, t1);
419 }
420 
421 
Fp64ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])422 void Fp64ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
423 {
424     uint8_t e[32];
425     uint32_t swap = 0;
426     int32_t t;
427     Fp64 x1, x2, x3;
428     Fp64 z2, z3;
429     Fp64 t1, t2;
430 
431     CURVE25519_DECODE_LITTLE_ENDIAN(e, scalar);
432     Fp64DataToPoly(x1, point);
433     CURVE25519_FP64_SET(x2, 1);
434     CURVE25519_FP64_SET(z2, 0);
435     CURVE25519_FP64_COPY(x3, x1);
436     CURVE25519_FP64_SET(z3, 1);
437 
438     for (t = 254; t >= 0; --t) { /* For t = bits-1(254) down to 0: */
439         /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
440         uint32_t kTemp = (e[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1;
441 
442         swap ^= kTemp;
443         CURVE25519_FP64_CSWAP(swap, x2, x3);
444         CURVE25519_FP64_CSWAP(swap, z2, z3);
445         swap = kTemp;
446         Fp64Sub(t1, x3, z3);
447         Fp64Sub(t2, x2, z2);
448         Fp64Add(x2, x2, z2);
449         Fp64Add(z2, x3, z3);
450         Fp64Mul(z3, x2, t1);
451         Fp64Mul(z2, z2, t2);
452         Fp64Sqr(t1, t2);
453         Fp64Sqr(t2, x2);
454         Fp64Add(x3, z3, z2);
455         Fp64Sub(z2, z3, z2);
456         Fp64Mul(x2, t2, t1);
457         Fp64Sub(t2, t2, t1);
458         Fp64Sqr(z2, z2);
459         Fp64MulScalar(z3, t2);
460         Fp64Sqr(x3, x3);
461         Fp64Add(t1, t1, z3);
462         Fp64Mul(z3, x1, z2);
463         Fp64Mul(z2, t2, t1);
464     }
465 
466     Fe64Invert(z2, z2);
467     Fp64Mul(x2, x2, z2);
468     Fp64PolyToData(out, x2);
469     (void)memset_s(e, sizeof(e), 0, sizeof(e));
470 }
471 #endif
472 
ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])473 void ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
474 {
475 #if defined (__x86_64__) && defined (HITLS_CRYPTO_X25519_X8664)
476     if (IsSupportBMI2() && IsSupportADX()) {
477         Fp64ScalarMultiPoint(out, scalar, point);
478         return;
479     }
480 #endif
481     Fp51ScalarMultiPoint(out, scalar, point);
482     return;
483 }
484 
485 #endif /* HITLS_CRYPTO_X25519 */
486