• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of the openHiTLS project.
3  *
4  * openHiTLS is licensed under the Mulan PSL v2.
5  * You can use this software according to the terms and conditions of the Mulan PSL v2.
6  * You may obtain a copy of Mulan PSL v2 at:
7  *
8  *     http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  * See the Mulan PSL v2 for more details.
14  */
15 
16 #include "hitls_build.h"
17 #ifdef HITLS_CRYPTO_X25519
18 
19 #include "securec.h"
20 #include "curve25519_local.h"
21 
22 // X25519 alternative implementation, faster but require int128
23 #if (defined(__SIZEOF_INT128__) && (__SIZEOF_INT128__ == 16))
24 #define CURVE25519_51BITS_MASK 0x7ffffffffffff
25 #define CURVE25519_51BITS 51
26 
Fp51DataToPoly(Fp51 * out,const uint8_t in[32])27 static void Fp51DataToPoly(Fp51 *out, const uint8_t in[32])
28 {
29     uint64_t h[5];
30 
31     CURVE25519_BYTES7_LOAD(h, in);
32 
33     CURVE25519_BYTES6_LOAD(h + 1, in + 7);
34     h[1] <<= 5;
35 
36     CURVE25519_BYTES7_LOAD(h + 2, in + 13);
37     h[2] <<= 2;
38 
39     CURVE25519_BYTES6_LOAD(h + 3, in + 20);
40     h[3] <<= 7;
41 
42     CURVE25519_BYTES6_LOAD(h + 4, in + 26);
43     h[4] &= 0x7fffffffffff; // 41 bits mask = 0x7fffffffffff
44     h[4] <<= 4;
45 
46     h[1] |= h[0] >> CURVE25519_51BITS;
47     h[0] &= CURVE25519_51BITS_MASK;
48 
49     h[2] |= h[1] >> CURVE25519_51BITS;
50     h[1] &= CURVE25519_51BITS_MASK;
51 
52     h[3] |= h[2] >> CURVE25519_51BITS;
53     h[2] &= CURVE25519_51BITS_MASK;
54 
55     h[4] |= h[3] >> CURVE25519_51BITS;
56     h[3] &= CURVE25519_51BITS_MASK;
57 
58     out->data[0] = h[0];
59     out->data[1] = h[1];
60     out->data[2] = h[2];
61     out->data[3] = h[3];
62     out->data[4] = h[4];
63 }
64 
Fp51UnloadTo8Bits(uint8_t out[32],uint64_t h[5])65 static void Fp51UnloadTo8Bits(uint8_t out[32], uint64_t h[5])
66 {
67     // load from uint64 to uint8, load 8 bits at a time
68     out[0] = (uint8_t)h[0];
69     out[1] = (uint8_t)(h[0] >> 8);
70     out[2] = (uint8_t)(h[0] >> 16);
71     out[3] = (uint8_t)(h[0] >> 24);
72     out[4] = (uint8_t)(h[0] >> 32);
73     out[5] = (uint8_t)(h[0] >> 40);
74     // load from position 48 from h[1] and (8-5)=3 bits from h[1] to out[6]
75     out[6] = (uint8_t)((h[0] >> 48) | (uint8_t)(h[1] << 3));
76     out[7] = (uint8_t)(h[1] >> 5);
77     out[8] = (uint8_t)(h[1] >> 13);
78     out[9] = (uint8_t)(h[1] >> 21);
79     out[10] = (uint8_t)(h[1] >> 29);
80     out[11] = (uint8_t)(h[1] >> 37);
81     // load from position 45 from h[1] and (8-2)=6 bits from h[2] to out[12]
82     out[12] = (uint8_t)((h[1] >> 45) | (uint8_t)(h[2] << 6));
83     out[13] = (uint8_t)(h[2] >> 2);
84     out[14] = (uint8_t)(h[2] >> 10);
85     out[15] = (uint8_t)(h[2] >> 18);
86     out[16] = (uint8_t)(h[2] >> 26);
87     out[17] = (uint8_t)(h[2] >> 34);
88     out[18] = (uint8_t)(h[2] >> 42);
89     // load from position 50 from h[2] and (8-1)=7 bits from h[3] to out[19]
90     out[19] = (uint8_t)((h[2] >> 50) | (uint8_t)(h[3] << 1));
91     out[20] = (uint8_t)(h[3] >> 7);
92     out[21] = (uint8_t)(h[3] >> 15);
93     out[22] = (uint8_t)(h[3] >> 23);
94     out[23] = (uint8_t)(h[3] >> 31);
95     out[24] = (uint8_t)(h[3] >> 39);
96     // load from position 47 from h[3] and (4-4)=4 bits from h[4] to out[25]
97     out[25] = (uint8_t)((h[3] >> 47) | (uint8_t)(h[4] << 4));
98     out[26] = (uint8_t)(h[4] >> 4);
99     out[27] = (uint8_t)(h[4] >> 12);
100     out[28] = (uint8_t)(h[4] >> 20);
101     out[29] = (uint8_t)(h[4] >> 28);
102     out[30] = (uint8_t)(h[4] >> 36);
103     out[31] = (uint8_t)(h[4] >> 44);
104 }
105 
Fp51PolyToData(const Fp51 * in,uint8_t out[32])106 static void Fp51PolyToData(const Fp51 *in, uint8_t out[32])
107 {
108     uint64_t h[5];
109     h[0] = in->data[0];
110     h[1] = in->data[1];
111     h[2] = in->data[2];
112     h[3] = in->data[3];
113     h[4] = in->data[4];
114     uint64_t carry;
115 
116     carry = (h[0] + 19) >> CURVE25519_51BITS; // plus 19 then calculate carry
117     carry = (h[1] + carry) >> CURVE25519_51BITS;
118     carry = (h[2] + carry) >> CURVE25519_51BITS;
119     carry = (h[3] + carry) >> CURVE25519_51BITS;
120     carry = (h[4] + carry) >> CURVE25519_51BITS;
121 
122     h[0] += 19 * carry; // process carry h[4] -> h[0], h[0] += 19 * carry
123     h[1] += h[0] >> CURVE25519_51BITS;
124     h[0] &= CURVE25519_51BITS_MASK;
125     h[2] += h[1] >> CURVE25519_51BITS;
126     h[1] &= CURVE25519_51BITS_MASK;
127     h[3] += h[2] >> CURVE25519_51BITS;
128     h[2] &= CURVE25519_51BITS_MASK;
129     h[4] += h[3] >> CURVE25519_51BITS;
130     h[3] &= CURVE25519_51BITS_MASK;
131     h[4] &= CURVE25519_51BITS_MASK;
132 
133     Fp51UnloadTo8Bits(out, h);
134 }
135 
Fp51ProcessCarry(__uint128_t in[5])136 void Fp51ProcessCarry(__uint128_t in[5])
137 {
138     in[1] += (uint64_t)(in[0] >> CURVE25519_51BITS);
139     in[0] = (uint64_t)in[0] & CURVE25519_51BITS_MASK;
140 
141     in[2] += (uint64_t)(in[1] >> CURVE25519_51BITS);
142     in[1] = (uint64_t)in[1] & CURVE25519_51BITS_MASK;
143 
144     in[3] += (uint64_t)(in[2] >> CURVE25519_51BITS);
145     in[2] = (uint64_t)in[2] & CURVE25519_51BITS_MASK;
146 
147     in[4] += (uint64_t)(in[3] >> CURVE25519_51BITS);
148     in[3] = (uint64_t)in[3] & CURVE25519_51BITS_MASK;
149 
150     in[0] += (uint64_t)(in[4] >> CURVE25519_51BITS) * 19;
151     in[4] = (uint64_t)in[4] & CURVE25519_51BITS_MASK;
152 
153     in[1] += in[0] >> CURVE25519_51BITS;
154     in[0] &= CURVE25519_51BITS_MASK;
155 }
156 
Fp51Mul(Fp51 * out,const Fp51 * f,const Fp51 * g)157 void Fp51Mul(Fp51 *out, const Fp51 *f, const Fp51 *g)
158 {
159     __uint128_t h[5];
160     // h[0] = f0g0 + 19*f1g4 + 19*f2g3 + 19*f3g2 + 19*f4g1
161     h[0] = (__uint128_t)f->data[0] * g->data[0] + (__uint128_t)f->data[1] * g->data[4] * 19 +
162         (__uint128_t)f->data[2] * g->data[3] * 19 + (__uint128_t)f->data[3] * g->data[2] * 19 + // 19*f2g3 + 19*f3g2
163         (__uint128_t)f->data[4] * g->data[1] * 19; // 19*f4g1
164     // h[1] = f0g1 + f1g0 + 19*f2g4 + 19*f3g3 + 19*f4g2
165     h[1] = (__uint128_t)f->data[0] * g->data[1] + (__uint128_t)f->data[1] * g->data[0] +
166         (__uint128_t)f->data[2] * g->data[4] * 19 + (__uint128_t)f->data[3] * g->data[3] * 19 + // 19*f2g4 + 19*f3g3
167         (__uint128_t)f->data[4] * g->data[2] * 19; // 19*f4g2
168     // h[2] = f0g2 + f1g1 + f2g0 + 19*f3g4 + 19*f4g3
169     h[2] = (__uint128_t)f->data[0] * g->data[2] + (__uint128_t)f->data[1] * g->data[1] +
170         (__uint128_t)f->data[2] * g->data[0] + (__uint128_t)f->data[3] * g->data[4] * 19 + // f2g0 + 19*f3g4
171         (__uint128_t)f->data[4] * g->data[3] * 19; // 19*f4g3
172     // h[3] = f0g3 + f1g2 + f2g1 + f3g0 + 19*f4g4
173     h[3] = (__uint128_t)f->data[0] * g->data[3] + (__uint128_t)f->data[1] * g->data[2] +
174         (__uint128_t)f->data[2] * g->data[1] + (__uint128_t)f->data[3] * g->data[0] + // f2g1 + f3g0
175         (__uint128_t)f->data[4] * g->data[4] * 19; // 19*f4g4
176     // h[4] = f0g4 + f1g3 + f2g2 + f3g1 + f4g0
177     h[4] = (__uint128_t)f->data[0] * g->data[4] + (__uint128_t)f->data[1] * g->data[3] +
178         (__uint128_t)f->data[2] * g->data[2] + (__uint128_t)f->data[3] * g->data[1] + // f2g2 + f3g1
179         (__uint128_t)f->data[4] * g->data[0]; // f4g0
180 
181     Fp51ProcessCarry(h);
182 
183     out->data[0] = (uint64_t)h[0];
184     out->data[1] = (uint64_t)h[1];
185     out->data[2] = (uint64_t)h[2];
186     out->data[3] = (uint64_t)h[3];
187     out->data[4] = (uint64_t)h[4];
188 }
189 
Fp51Square(Fp51 * out,const Fp51 * in)190 void Fp51Square(Fp51 *out, const Fp51 *in)
191 {
192     __uint128_t h[5];
193     uint64_t in0mul2 = in->data[0] * 2;
194     uint64_t in1mul2 = in->data[1] * 2;
195     uint64_t in2mul2 = in->data[2] * 2;
196     uint64_t in3mul19 = in->data[3] * 19;
197     uint64_t in4mul19 = in->data[4] * 19;
198 
199     // h0 = in0^2 + 38 * in1 * in4 + 38 * in2 * in3
200     h[0] = (__uint128_t)in->data[0] * in->data[0] + (__uint128_t)in1mul2 * in4mul19 +
201         (__uint128_t)in2mul2 * in3mul19;
202     // h1 = 2 * in0 * in1 + 19 * in3^2 + 38 * in2 * in4
203     h[1] = (__uint128_t)in0mul2 * in->data[1] + (__uint128_t)in->data[3] * in3mul19 +
204         (__uint128_t)in2mul2 * in4mul19;
205     // h2 = 2 * in0 * in2 + in1^2 + 38 * in3 * in4
206     h[2] = (__uint128_t)in0mul2 * in->data[2] + (__uint128_t)in->data[1] * in->data[1] +
207         (__uint128_t)(in->data[3] * 2) * in4mul19; // 2 * 19 * in3 * in4
208     // h3 = 2 * in0 * in3 + 19 * in4^2 + 2 * in1 * in2
209     h[3] = (__uint128_t)in0mul2 * in->data[3] + (__uint128_t)in->data[4] * in4mul19 +
210         (__uint128_t)in1mul2 * in->data[2]; // 2 * in1 * in2
211     // h4 = 2 * in0 * in4 + 2 * in1 * in3 + in2^2
212     h[4] = (__uint128_t)in0mul2 * in->data[4] + (__uint128_t)in1mul2 * in->data[3] +
213         (__uint128_t)in->data[2] * in->data[2]; // in2^2
214 
215     Fp51ProcessCarry(h);
216 
217     out->data[0] = (uint64_t)h[0];
218     out->data[1] = (uint64_t)h[1];
219     out->data[2] = (uint64_t)h[2];
220     out->data[3] = (uint64_t)h[3];
221     out->data[4] = (uint64_t)h[4];
222 }
223 
Fp51MulScalar(Fp51 * out,const Fp51 * in,const uint32_t scalar)224 void Fp51MulScalar(Fp51 *out, const Fp51 *in, const uint32_t scalar)
225 {
226     __uint128_t h[5];
227     h[0] = in->data[0] * (__uint128_t)scalar;
228     h[1] = in->data[1] * (__uint128_t)scalar;
229     h[2] = in->data[2] * (__uint128_t)scalar;
230     h[3] = in->data[3] * (__uint128_t)scalar;
231     h[4] = in->data[4] * (__uint128_t)scalar;
232 
233     Fp51ProcessCarry(h);
234 
235     out->data[0] = (uint64_t)h[0];
236     out->data[1] = (uint64_t)h[1];
237     out->data[2] = (uint64_t)h[2];
238     out->data[3] = (uint64_t)h[3];
239     out->data[4] = (uint64_t)h[4];
240 }
241 
242 /* out = in1 ^ (4 * 2 ^ (2 * times)) * in2 */
Fp51MultiSquare(Fp51 * in1,Fp51 * in2,Fp51 * out,int32_t times)243 static inline void Fp51MultiSquare(Fp51 *in1, Fp51 *in2, Fp51 *out, int32_t times)
244 {
245     int32_t i;
246     Fp51 temp1, temp2;
247     Fp51Square(&temp1, in1);
248     Fp51Square(&temp2, &temp1);
249     for (i = 0; i < times; i++) {
250         Fp51Square(&temp1, &temp2);
251         Fp51Square(&temp2, &temp1);
252     }
253     Fp51Mul(out, in2, &temp2);
254 }
255 
256 /* out = a ^ -1 */
Fp51Invert(Fp51 * out,const Fp51 * a)257 static void Fp51Invert(Fp51 *out, const Fp51 *a)
258 {
259     Fp51 a0;    /* save a^1         */
260     Fp51 a1;    /* save a^2         */
261     Fp51 a2;    /* save a^11        */
262     Fp51 a3;    /* save a^(2^5-1)   */
263     Fp51 a4;    /* save a^(2^10-1)  */
264     Fp51 a5;    /* save a^(2^20-1)  */
265     Fp51 a6;    /* save a^(2^40-1)  */
266     Fp51 a7;    /* save a^(2^50-1)  */
267     Fp51 a8;    /* save a^(2^100-1) */
268     Fp51 a9;    /* save a^(2^200-1) */
269     Fp51 a10;   /* save a^(2^250-1) */
270     Fp51 temp1, temp2;
271 
272     /* We know a×b=1(mod p), then a and b are inverses of mod p, i.e. a=b^(-1), b=a^(-1);
273      * According to Fermat's little theorem a^(p-1)=1(mod p), so a*a^(p-2)=1(mod p);
274      * So the inverse element of a is a^(-1) = a^(p-2)(mod p)
275      * Here it is, p=2^255-19, thus we need to compute a^(2^255-21)(mod(2^255-19))
276      */
277 
278     /* a^1 */
279     CURVE25519_FP51_COPY(a0.data, a->data);
280 
281     /* a^2 */
282     Fp51Square(&a1, &a0);
283 
284     /* a^4 */
285     Fp51Square(&temp1, &a1);
286 
287     /* a^8 */
288     Fp51Square(&temp2, &temp1);
289 
290     /* a^9 */
291     Fp51Mul(&temp1, &a0, &temp2);
292 
293     /* a^11 */
294     Fp51Mul(&a2, &a1, &temp1);
295 
296     /* a^22 */
297     Fp51Square(&temp2, &a2);
298 
299     /* a^(2^5-1) = a^(9+22) */
300     Fp51Mul(&a3, &temp1, &temp2);
301 
302     /* a^(2^10-1) = a^(2^10-2^5) * a^(2^5-1) */
303     Fp51Square(&temp1, &a3);
304     Fp51Square(&temp2, &temp1);
305     Fp51Square(&temp1, &temp2);
306     Fp51Square(&temp2, &temp1);
307     Fp51Square(&temp1, &temp2);
308     Fp51Mul(&a4, &a3, &temp1);
309 
310     /* a^(2^20-1) = a^(2^20-2^10) * a^(2^10-1) */
311     Fp51MultiSquare(&a4, &a4, &a5, 4); // (2 * 2) ^ 4
312 
313     /* a^(2^40-1) = a^(2^40-2^20) * a^(2^20-1) */
314     Fp51MultiSquare(&a5, &a5, &a6, 9); // (2 * 2) ^ 9
315 
316     /* a^(2^50-1) = a^(2^50-2^10) * a^(2^10-1) */
317     Fp51MultiSquare(&a6, &a4, &a7, 4); // (2 * 2) ^ 4
318 
319     /* a^(2^100-1) = a^(2^100-2^50) * a^(2^50-1) */
320     Fp51MultiSquare(&a7, &a7, &a8, 24); // (2 * 2) ^ 24
321 
322     /* a^(2^200-1) = a^(2^200-2^100) * a^(2^100-1) */
323     Fp51MultiSquare(&a8, &a8, &a9, 49); // (2 * 2) ^ 49
324 
325     /* a^(2^250-1) = a^(2^250-2^50) * a^(2^50-1) */
326     Fp51MultiSquare(&a9, &a7, &a10, 24); // (2 * 2) ^ 24
327 
328     /* a^(2^5*(2^250-1)) = (a^(2^250-1))^5 */
329     Fp51Square(&temp1, &a10);
330     Fp51Square(&temp2, &temp1);
331     Fp51Square(&temp1, &temp2);
332     Fp51Square(&temp2, &temp1);
333     Fp51Square(&temp1, &temp2);
334 
335     /* The output: a^(2^255-21) = a(2^5*(2^250-1)+11) = a^(2^5*(2^250-1)) * a^11 */
336     Fp51Mul(out, &a2, &temp1);
337 }
338 
ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])339 void ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
340 {
341     uint8_t k[32];
342     const uint8_t *u = point;
343     int32_t t;
344     uint32_t swap;
345     uint32_t kTemp;
346     Fp51 x1, x2, x3;
347     Fp51 z2, z3;
348     Fp51 t1, t2;
349 
350     /* Decord the scalar into k */
351     CURVE25519_DECODE_LITTLE_ENDIAN(k, scalar);
352 
353     /* Reference RFC 7748 section 5: The constant a24 is (486662 - 2) / 4 = 121665 for curve25519/X25519 */
354     Fp51DataToPoly(&x1, u);
355     CURVE25519_FP51_SET(x2.data, 1);
356     CURVE25519_FP51_SET(z2.data, 0);
357     CURVE25519_FP51_COPY(x3.data, x1.data);
358     CURVE25519_FP51_SET(z3.data, 1);
359     swap = 0;
360 
361     /* "bits" parameter set to 255 for x25519  */ /* For t = bits-1(254) down to 0: */
362     for (t = 254; t >= 0; t--) {
363         /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
364         kTemp = (k[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1;           /* kTemp = (k >> t) & 1 */
365         swap ^= kTemp;                                /* swap ^= kTemp */
366         CURVE25519_FP51_CSWAP(swap, x2.data, x3.data);  /* (x_2, x_3) = cswap(swap, x_2, x_3) */
367         CURVE25519_FP51_CSWAP(swap, z2.data, z3.data);  /* (z_2, z_3) = cswap(swap, z_2, z_3) */
368         swap = kTemp;                                 /* swap = kTemp */
369         CURVE25519_FP51_SUB(t1.data, x3.data, z3.data);                /* x3 = D */
370         CURVE25519_FP51_SUB(t2.data, x2.data, z2.data);                /* t2 = B */
371         CURVE25519_FP51_ADD(x2.data, x2.data, z2.data);                /* t1 = A */
372         CURVE25519_FP51_ADD(z2.data, x3.data, z3.data);                /* x2 = C */
373 
374         Fp51Mul(&z3, &t1, &x2);
375         Fp51Mul(&z2, &z2, &t2);
376         Fp51Square(&t1, &t2);
377         Fp51Square(&t2, &x2);
378 
379         CURVE25519_FP51_ADD(x3.data, z3.data, z2.data);
380         CURVE25519_FP51_SUB(z2.data, z3.data, z2.data);
381         Fp51Mul(&x2, &t2, &t1);
382         CURVE25519_FP51_SUB(t2.data, t2.data, t1.data);
383         Fp51Square(&z2, &z2);
384         Fp51MulScalar(&z3, &t2, 121666); // z2 *= 121665 + 1 = 121666
385         Fp51Square(&x3, &x3);
386         CURVE25519_FP51_ADD(t1.data, t1.data, z3.data);
387         Fp51Mul(&z3, &x1, &z2);
388         Fp51Mul(&z2, &t2, &t1);
389     }
390 
391     CURVE25519_FP51_CSWAP(swap, x2.data, x3.data);
392     CURVE25519_FP51_CSWAP(swap, z2.data, z3.data);
393     /* Return x2 * (z2 ^ (p - 2)) */
394     Fp51Invert(&t1, &z2);
395     Fp51Mul(&t2, &x2, &t1);
396     Fp51PolyToData(&t2, out);
397     BSL_SAL_CleanseData(k, sizeof(k));
398 }
399 
400 #else
401 
FpMulScalar(Fp25 out,const Fp25 p,const int32_t scalar)402 void FpMulScalar(Fp25 out, const Fp25 p, const int32_t scalar)
403 {
404     int64_t s = (int64_t)scalar;
405     uint64_t over;
406     uint64_t result[10];
407     uint64_t mul19;
408     uint64_t t1;
409     uint64_t signMask1;
410     uint64_t signMask2;
411 
412     /* Could be more than 32 bits but not be more than 64 bits */
413     CURVE25519_FP_MUL_SCALAR(result, p, s);
414 
415     /* Process Carry */
416     /* the radix 2^25.5 representation:
417      * f0+2^26*f1+2^51*f2+2^77*f3+2^102*f4+2^128*f5+2^153*f6+2^179*f7+2^204*f8+2^230*f9 */
418     over = result[9] + (1 << 24); /* carry chain: index 9->0; 2^25 progressiv, left shift by 24 bits */
419     signMask1 = MASK_HIGH64(25) & (-((over) >> 63)); /* 2^25 progressiv, shift 63 for sign */
420     t1 = (over >> 25) | signMask1;
421     mul19 = (t1 + (t1 << 1) + (t1 << 4));            /* 19 = 1 + 2^1 + 2^4 */
422     result[0] += mul19;                              /* carry chain: index 9->0 */
423     result[9] -= CURVE25519_MASK_HIGH_39 & over;
424 
425     /* carry chain: index 1->2; 2^25 progressiv(26->51) */
426     /* carry chain: index 1->2; 2^25 progressiv, left shift by 24 bits */
427     PROCESS_CARRY(result[1], result[2], signMask1, over, 24);
428 
429     /* carry chain: index 3->4; 2^25 progressiv(77->102) */
430     /* carry chain: index 3->4; 2^25 progressiv, left shift by 24 bits */
431     PROCESS_CARRY(result[3], result[4], signMask1, over, 24);
432 
433     /* carry chain: index 5->6; 2^25 progressiv(128->153) */
434     /* carry chain: index 5->6; 2^25 progressiv, left shift by 24 bits */
435     PROCESS_CARRY(result[5], result[6], signMask1, over, 24);
436 
437     /* carry chain: index 7->8; 2^25 progressiv(179->204) */
438     /* carry chain: index 7->8; 2^25 progressiv, left shift by 24 bits */
439     PROCESS_CARRY(result[7], result[8], signMask1, over, 24);
440 
441     /* carry chain: index 0->1; 2^26 progressiv(0->26) */
442     /* carry chain: index 0->1; 2^26 progressiv, left shift by 25 bits */
443     PROCESS_CARRY(result[0], result[1], signMask2, over, 25);
444 
445     /* carry chain: index 2->3; 2^26 progressiv(51->77) */
446     /* carry chain: index 2->3; 2^26 progressiv, left shift by 25 bits */
447     PROCESS_CARRY(result[2], result[3], signMask2, over, 25);
448 
449     /* carry chain: index 4->5; 2^26 progressiv(102->128) */
450     /* carry chain: index 4->5; 2^26 progressiv, left shift by 25 bits */
451     PROCESS_CARRY(result[4], result[5], signMask2, over, 25);
452 
453     /* carry chain: index 6->7; 2^26 progressiv(153->179) */
454     /* carry chain: index 6->7; 2^26 progressiv, left shift by 25 bits */
455     PROCESS_CARRY(result[6], result[7], signMask2, over, 25);
456 
457     /* carry chain: index 8->9; 2^26 progressiv(204->230) */
458     /* carry chain: index 8->9; 2^26 progressiv, left shift by 25 bits */
459     PROCESS_CARRY(result[8], result[9], signMask2, over, 25);
460 
461     /* The result would not be more than 32 bits */
462     out[0] = (int32_t)result[0]; // 0
463     out[1] = (int32_t)result[1]; // 1
464     out[2] = (int32_t)result[2]; // 2
465     out[3] = (int32_t)result[3]; // 3
466     out[4] = (int32_t)result[4]; // 4
467     out[5] = (int32_t)result[5]; // 5
468     out[6] = (int32_t)result[6]; // 6
469     out[7] = (int32_t)result[7]; // 7
470     out[8] = (int32_t)result[8]; // 8
471     out[9] = (int32_t)result[9]; // 9
472 
473     (void)memset_s(result, sizeof(result), 0, sizeof(result));
474 }
475 
ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])476 void ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
477 {
478     uint8_t k[32];
479     const uint8_t *u = point;
480     int32_t t;
481     uint32_t swap;
482     uint32_t kTemp;
483     Fp25 x1, x2, x3, z2, z3, t1, t2, t3;
484 
485     /* Decord the scalar into k */
486     CURVE25519_DECODE_LITTLE_ENDIAN(k, scalar);
487 
488     /* Reference RFC 7748 section 5:The constant a24 is (486662 - 2) / 4 = 121665 for curve25519/X25519 */
489     DataToPolynomial(x1, u);
490     CURVE25519_FP_SET(x2, 1);
491     CURVE25519_FP_SET(z2, 0);
492     CURVE25519_FP_COPY(x3, x1);
493     CURVE25519_FP_SET(z3, 1);
494     swap = 0;
495 
496     /* "bits" parameter set to 255 for x25519  */ /* For t = bits-1(254) down to 0: */
497     for (t = 254; t >= 0; t--) {
498         /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
499         kTemp = (k[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1;           /* kTemp = (k >> t) & 1 */
500         swap ^= kTemp;                                /* swap ^= kTemp */
501         CURVE25519_FP_CSWAP(swap, x2, x3);  /* (x_2, x_3) = cswap(swap, x_2, x_3) */
502         CURVE25519_FP_CSWAP(swap, z2, z3);  /* (z_2, z_3) = cswap(swap, z_2, z_3) */
503         swap = kTemp;                                 /* swap = kTemp */
504         CURVE25519_FP_ADD(t1, x2, z2);                /* t1 = A */
505         CURVE25519_FP_SUB(t2, x2, z2);                /* t2 = B */
506         CURVE25519_FP_ADD(x2, x3, z3);                /* x2 = C */
507         CURVE25519_FP_SUB(x3, x3, z3);                /* x3 = D */
508         FpMul(z2, x3, t1);             /* z2 = DA */
509         FpMul(z3, x2, t2);             /* z3 = CB */
510         FpSquareDoubleCore(t1, t1, false);               /* t1 = AA */
511         FpSquareDoubleCore(t2, t2, false);               /* t2 = BB */
512         CURVE25519_FP_SUB(t3, t1, t2);                /* t3 = E = AA - BB */
513         CURVE25519_FP_ADD(x3, z2, z3);                /* x3 = DA + CB */
514         FpSquareDoubleCore(x3, x3, false);             /* x3 = (DA + CB)^2 */
515         CURVE25519_FP_SUB(z3, z2, z3);                /* z3 = DA - CB */
516         FpSquareDoubleCore(z3, z3, false);             /* z3 = (DA - CB)^2 */
517         FpMul(z3, x1, z3);            /* z3 = x1 * (DA - CB)^2 */
518         FpMul(x2, t1, t2);            /* x2 = AA * BB */
519         FpMul(t1, t3, t1);            /* t1 = E * AA */
520         FpSquareDoubleCore(z2, t3, false);             /* z2 = E^2 */
521         /* Reference RFC 7748 section 5:The constant a24 is (486662 - 2) / 4 = 121665 for curve25519/X25519 */
522         FpMulScalar(z2, z2, 121665);  /* z2 = a24 * E^2 */
523         CURVE25519_FP_ADD(z2, t1, z2);                /* z2 = E * (AA + a24 * E) */
524     }
525 
526     CURVE25519_FP_CSWAP(swap, x2, x3);
527     CURVE25519_FP_CSWAP(swap, z2, z3);
528     /* Return x2 * (z2 ^ (p - 2)) */
529     FpInvert(t1, z2);
530     FpMul(t2, x2, t1);
531     PolynomialToData(out, t2);
532 }
533 #endif // uint128
534 #endif /* HITLS_CRYPTO_X25519 */
535