• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**
2   * \file bn_mul.h
3   *
4   * \brief Multi-precision integer library
5   */
6  /*
7   *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
8   *  SPDX-License-Identifier: Apache-2.0
9   *
10   *  Licensed under the Apache License, Version 2.0 (the "License"); you may
11   *  not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *  http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
18   *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   *
22   *  This file is part of mbed TLS (https://tls.mbed.org)
23   */
24  /*
25   *      Multiply source vector [s] with b, add result
26   *       to destination vector [d] and set carry c.
27   *
28   *      Currently supports:
29   *
30   *         . IA-32 (386+)         . AMD64 / EM64T
31   *         . IA-32 (SSE2)         . Motorola 68000
32   *         . PowerPC, 32-bit      . MicroBlaze
33   *         . PowerPC, 64-bit      . TriCore
34   *         . SPARC v8             . ARM v3+
35   *         . Alpha                . MIPS32
36   *         . C, longlong          . C, generic
37   */
38  #ifndef MBEDTLS_BN_MUL_H
39  #define MBEDTLS_BN_MUL_H
40  
41  #if !defined(MBEDTLS_CONFIG_FILE)
42  #include "config.h"
43  #else
44  #include MBEDTLS_CONFIG_FILE
45  #endif
46  
47  #include "bignum.h"
48  
49  #if defined(MBEDTLS_HAVE_ASM)
50  
51  #ifndef asm
52  #define asm __asm
53  #endif
54  
55  /* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
56  #if defined(__GNUC__) && \
57      ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
58  
59  /*
60   * Disable use of the i386 assembly code below if option -O0, to disable all
61   * compiler optimisations, is passed, detected with __OPTIMIZE__
62   * This is done as the number of registers used in the assembly code doesn't
63   * work with the -O0 option.
64   */
65  #if defined(__i386__) && defined(__OPTIMIZE__)
66  
67  #define MULADDC_INIT                        \
68      asm(                                    \
69          "movl   %%ebx, %0           \n\t"   \
70          "movl   %5, %%esi           \n\t"   \
71          "movl   %6, %%edi           \n\t"   \
72          "movl   %7, %%ecx           \n\t"   \
73          "movl   %8, %%ebx           \n\t"
74  
75  #define MULADDC_CORE                        \
76          "lodsl                      \n\t"   \
77          "mull   %%ebx               \n\t"   \
78          "addl   %%ecx,   %%eax      \n\t"   \
79          "adcl   $0,      %%edx      \n\t"   \
80          "addl   (%%edi), %%eax      \n\t"   \
81          "adcl   $0,      %%edx      \n\t"   \
82          "movl   %%edx,   %%ecx      \n\t"   \
83          "stosl                      \n\t"
84  
85  #if defined(MBEDTLS_HAVE_SSE2)
86  
87  #define MULADDC_HUIT                            \
88          "movd     %%ecx,     %%mm1      \n\t"   \
89          "movd     %%ebx,     %%mm0      \n\t"   \
90          "movd     (%%edi),   %%mm3      \n\t"   \
91          "paddq    %%mm3,     %%mm1      \n\t"   \
92          "movd     (%%esi),   %%mm2      \n\t"   \
93          "pmuludq  %%mm0,     %%mm2      \n\t"   \
94          "movd     4(%%esi),  %%mm4      \n\t"   \
95          "pmuludq  %%mm0,     %%mm4      \n\t"   \
96          "movd     8(%%esi),  %%mm6      \n\t"   \
97          "pmuludq  %%mm0,     %%mm6      \n\t"   \
98          "movd     12(%%esi), %%mm7      \n\t"   \
99          "pmuludq  %%mm0,     %%mm7      \n\t"   \
100          "paddq    %%mm2,     %%mm1      \n\t"   \
101          "movd     4(%%edi),  %%mm3      \n\t"   \
102          "paddq    %%mm4,     %%mm3      \n\t"   \
103          "movd     8(%%edi),  %%mm5      \n\t"   \
104          "paddq    %%mm6,     %%mm5      \n\t"   \
105          "movd     12(%%edi), %%mm4      \n\t"   \
106          "paddq    %%mm4,     %%mm7      \n\t"   \
107          "movd     %%mm1,     (%%edi)    \n\t"   \
108          "movd     16(%%esi), %%mm2      \n\t"   \
109          "pmuludq  %%mm0,     %%mm2      \n\t"   \
110          "psrlq    $32,       %%mm1      \n\t"   \
111          "movd     20(%%esi), %%mm4      \n\t"   \
112          "pmuludq  %%mm0,     %%mm4      \n\t"   \
113          "paddq    %%mm3,     %%mm1      \n\t"   \
114          "movd     24(%%esi), %%mm6      \n\t"   \
115          "pmuludq  %%mm0,     %%mm6      \n\t"   \
116          "movd     %%mm1,     4(%%edi)   \n\t"   \
117          "psrlq    $32,       %%mm1      \n\t"   \
118          "movd     28(%%esi), %%mm3      \n\t"   \
119          "pmuludq  %%mm0,     %%mm3      \n\t"   \
120          "paddq    %%mm5,     %%mm1      \n\t"   \
121          "movd     16(%%edi), %%mm5      \n\t"   \
122          "paddq    %%mm5,     %%mm2      \n\t"   \
123          "movd     %%mm1,     8(%%edi)   \n\t"   \
124          "psrlq    $32,       %%mm1      \n\t"   \
125          "paddq    %%mm7,     %%mm1      \n\t"   \
126          "movd     20(%%edi), %%mm5      \n\t"   \
127          "paddq    %%mm5,     %%mm4      \n\t"   \
128          "movd     %%mm1,     12(%%edi)  \n\t"   \
129          "psrlq    $32,       %%mm1      \n\t"   \
130          "paddq    %%mm2,     %%mm1      \n\t"   \
131          "movd     24(%%edi), %%mm5      \n\t"   \
132          "paddq    %%mm5,     %%mm6      \n\t"   \
133          "movd     %%mm1,     16(%%edi)  \n\t"   \
134          "psrlq    $32,       %%mm1      \n\t"   \
135          "paddq    %%mm4,     %%mm1      \n\t"   \
136          "movd     28(%%edi), %%mm5      \n\t"   \
137          "paddq    %%mm5,     %%mm3      \n\t"   \
138          "movd     %%mm1,     20(%%edi)  \n\t"   \
139          "psrlq    $32,       %%mm1      \n\t"   \
140          "paddq    %%mm6,     %%mm1      \n\t"   \
141          "movd     %%mm1,     24(%%edi)  \n\t"   \
142          "psrlq    $32,       %%mm1      \n\t"   \
143          "paddq    %%mm3,     %%mm1      \n\t"   \
144          "movd     %%mm1,     28(%%edi)  \n\t"   \
145          "addl     $32,       %%edi      \n\t"   \
146          "addl     $32,       %%esi      \n\t"   \
147          "psrlq    $32,       %%mm1      \n\t"   \
148          "movd     %%mm1,     %%ecx      \n\t"
149  
150  #define MULADDC_STOP                    \
151          "emms                   \n\t"   \
152          "movl   %4, %%ebx       \n\t"   \
153          "movl   %%ecx, %1       \n\t"   \
154          "movl   %%edi, %2       \n\t"   \
155          "movl   %%esi, %3       \n\t"   \
156          : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
157          : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
158          : "eax", "ebx", "ecx", "edx", "esi", "edi"      \
159      );
160  
161  #else
162  
163  #define MULADDC_STOP                    \
164          "movl   %4, %%ebx       \n\t"   \
165          "movl   %%ecx, %1       \n\t"   \
166          "movl   %%edi, %2       \n\t"   \
167          "movl   %%esi, %3       \n\t"   \
168          : "=m" (t), "=m" (c), "=m" (d), "=m" (s)        \
169          : "m" (t), "m" (s), "m" (d), "m" (c), "m" (b)   \
170          : "eax", "ebx", "ecx", "edx", "esi", "edi"      \
171      );
172  #endif /* SSE2 */
173  #endif /* i386 */
174  
175  #if defined(__amd64__) || defined (__x86_64__)
176  
177  #define MULADDC_INIT                        \
178      asm(                                    \
179          "xorq   %%r8, %%r8\n"
180  
181  #define MULADDC_CORE                        \
182          "movq   (%%rsi), %%rax\n"           \
183          "mulq   %%rbx\n"                    \
184          "addq   $8, %%rsi\n"                \
185          "addq   %%rcx, %%rax\n"             \
186          "movq   %%r8, %%rcx\n"              \
187          "adcq   $0, %%rdx\n"                \
188          "nop    \n"                         \
189          "addq   %%rax, (%%rdi)\n"           \
190          "adcq   %%rdx, %%rcx\n"             \
191          "addq   $8, %%rdi\n"
192  
193  #define MULADDC_STOP                        \
194          : "+c" (c), "+D" (d), "+S" (s)      \
195          : "b" (b)                           \
196          : "rax", "rdx", "r8"                \
197      );
198  
199  #endif /* AMD64 */
200  
201  #if defined(__mc68020__) || defined(__mcpu32__)
202  
203  #define MULADDC_INIT                    \
204      asm(                                \
205          "movl   %3, %%a2        \n\t"   \
206          "movl   %4, %%a3        \n\t"   \
207          "movl   %5, %%d3        \n\t"   \
208          "movl   %6, %%d2        \n\t"   \
209          "moveq  #0, %%d0        \n\t"
210  
211  #define MULADDC_CORE                    \
212          "movel  %%a2@+, %%d1    \n\t"   \
213          "mulul  %%d2, %%d4:%%d1 \n\t"   \
214          "addl   %%d3, %%d1      \n\t"   \
215          "addxl  %%d0, %%d4      \n\t"   \
216          "moveq  #0,   %%d3      \n\t"   \
217          "addl   %%d1, %%a3@+    \n\t"   \
218          "addxl  %%d4, %%d3      \n\t"
219  
220  #define MULADDC_STOP                    \
221          "movl   %%d3, %0        \n\t"   \
222          "movl   %%a3, %1        \n\t"   \
223          "movl   %%a2, %2        \n\t"   \
224          : "=m" (c), "=m" (d), "=m" (s)              \
225          : "m" (s), "m" (d), "m" (c), "m" (b)        \
226          : "d0", "d1", "d2", "d3", "d4", "a2", "a3"  \
227      );
228  
229  #define MULADDC_HUIT                        \
230          "movel  %%a2@+,  %%d1       \n\t"   \
231          "mulul  %%d2,    %%d4:%%d1  \n\t"   \
232          "addxl  %%d3,    %%d1       \n\t"   \
233          "addxl  %%d0,    %%d4       \n\t"   \
234          "addl   %%d1,    %%a3@+     \n\t"   \
235          "movel  %%a2@+,  %%d1       \n\t"   \
236          "mulul  %%d2,    %%d3:%%d1  \n\t"   \
237          "addxl  %%d4,    %%d1       \n\t"   \
238          "addxl  %%d0,    %%d3       \n\t"   \
239          "addl   %%d1,    %%a3@+     \n\t"   \
240          "movel  %%a2@+,  %%d1       \n\t"   \
241          "mulul  %%d2,    %%d4:%%d1  \n\t"   \
242          "addxl  %%d3,    %%d1       \n\t"   \
243          "addxl  %%d0,    %%d4       \n\t"   \
244          "addl   %%d1,    %%a3@+     \n\t"   \
245          "movel  %%a2@+,  %%d1       \n\t"   \
246          "mulul  %%d2,    %%d3:%%d1  \n\t"   \
247          "addxl  %%d4,    %%d1       \n\t"   \
248          "addxl  %%d0,    %%d3       \n\t"   \
249          "addl   %%d1,    %%a3@+     \n\t"   \
250          "movel  %%a2@+,  %%d1       \n\t"   \
251          "mulul  %%d2,    %%d4:%%d1  \n\t"   \
252          "addxl  %%d3,    %%d1       \n\t"   \
253          "addxl  %%d0,    %%d4       \n\t"   \
254          "addl   %%d1,    %%a3@+     \n\t"   \
255          "movel  %%a2@+,  %%d1       \n\t"   \
256          "mulul  %%d2,    %%d3:%%d1  \n\t"   \
257          "addxl  %%d4,    %%d1       \n\t"   \
258          "addxl  %%d0,    %%d3       \n\t"   \
259          "addl   %%d1,    %%a3@+     \n\t"   \
260          "movel  %%a2@+,  %%d1       \n\t"   \
261          "mulul  %%d2,    %%d4:%%d1  \n\t"   \
262          "addxl  %%d3,    %%d1       \n\t"   \
263          "addxl  %%d0,    %%d4       \n\t"   \
264          "addl   %%d1,    %%a3@+     \n\t"   \
265          "movel  %%a2@+,  %%d1       \n\t"   \
266          "mulul  %%d2,    %%d3:%%d1  \n\t"   \
267          "addxl  %%d4,    %%d1       \n\t"   \
268          "addxl  %%d0,    %%d3       \n\t"   \
269          "addl   %%d1,    %%a3@+     \n\t"   \
270          "addxl  %%d0,    %%d3       \n\t"
271  
272  #endif /* MC68000 */
273  
274  #if defined(__powerpc64__) || defined(__ppc64__)
275  
276  #if defined(__MACH__) && defined(__APPLE__)
277  
278  #define MULADDC_INIT                        \
279      asm(                                    \
280          "ld     r3, %3              \n\t"   \
281          "ld     r4, %4              \n\t"   \
282          "ld     r5, %5              \n\t"   \
283          "ld     r6, %6              \n\t"   \
284          "addi   r3, r3, -8          \n\t"   \
285          "addi   r4, r4, -8          \n\t"   \
286          "addic  r5, r5,  0          \n\t"
287  
288  #define MULADDC_CORE                        \
289          "ldu    r7, 8(r3)           \n\t"   \
290          "mulld  r8, r7, r6          \n\t"   \
291          "mulhdu r9, r7, r6          \n\t"   \
292          "adde   r8, r8, r5          \n\t"   \
293          "ld     r7, 8(r4)           \n\t"   \
294          "addze  r5, r9              \n\t"   \
295          "addc   r8, r8, r7          \n\t"   \
296          "stdu   r8, 8(r4)           \n\t"
297  
298  #define MULADDC_STOP                        \
299          "addze  r5, r5              \n\t"   \
300          "addi   r4, r4, 8           \n\t"   \
301          "addi   r3, r3, 8           \n\t"   \
302          "std    r5, %0              \n\t"   \
303          "std    r4, %1              \n\t"   \
304          "std    r3, %2              \n\t"   \
305          : "=m" (c), "=m" (d), "=m" (s)              \
306          : "m" (s), "m" (d), "m" (c), "m" (b)        \
307          : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
308      );
309  
310  
311  #else /* __MACH__ && __APPLE__ */
312  
313  #define MULADDC_INIT                        \
314      asm(                                    \
315          "ld     %%r3, %3            \n\t"   \
316          "ld     %%r4, %4            \n\t"   \
317          "ld     %%r5, %5            \n\t"   \
318          "ld     %%r6, %6            \n\t"   \
319          "addi   %%r3, %%r3, -8      \n\t"   \
320          "addi   %%r4, %%r4, -8      \n\t"   \
321          "addic  %%r5, %%r5,  0      \n\t"
322  
323  #define MULADDC_CORE                        \
324          "ldu    %%r7, 8(%%r3)       \n\t"   \
325          "mulld  %%r8, %%r7, %%r6    \n\t"   \
326          "mulhdu %%r9, %%r7, %%r6    \n\t"   \
327          "adde   %%r8, %%r8, %%r5    \n\t"   \
328          "ld     %%r7, 8(%%r4)       \n\t"   \
329          "addze  %%r5, %%r9          \n\t"   \
330          "addc   %%r8, %%r8, %%r7    \n\t"   \
331          "stdu   %%r8, 8(%%r4)       \n\t"
332  
333  #define MULADDC_STOP                        \
334          "addze  %%r5, %%r5          \n\t"   \
335          "addi   %%r4, %%r4, 8       \n\t"   \
336          "addi   %%r3, %%r3, 8       \n\t"   \
337          "std    %%r5, %0            \n\t"   \
338          "std    %%r4, %1            \n\t"   \
339          "std    %%r3, %2            \n\t"   \
340          : "=m" (c), "=m" (d), "=m" (s)              \
341          : "m" (s), "m" (d), "m" (c), "m" (b)        \
342          : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
343      );
344  
345  #endif /* __MACH__ && __APPLE__ */
346  
347  #elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32  */
348  
349  #if defined(__MACH__) && defined(__APPLE__)
350  
351  #define MULADDC_INIT                    \
352      asm(                                \
353          "lwz    r3, %3          \n\t"   \
354          "lwz    r4, %4          \n\t"   \
355          "lwz    r5, %5          \n\t"   \
356          "lwz    r6, %6          \n\t"   \
357          "addi   r3, r3, -4      \n\t"   \
358          "addi   r4, r4, -4      \n\t"   \
359          "addic  r5, r5,  0      \n\t"
360  
361  #define MULADDC_CORE                    \
362          "lwzu   r7, 4(r3)       \n\t"   \
363          "mullw  r8, r7, r6      \n\t"   \
364          "mulhwu r9, r7, r6      \n\t"   \
365          "adde   r8, r8, r5      \n\t"   \
366          "lwz    r7, 4(r4)       \n\t"   \
367          "addze  r5, r9          \n\t"   \
368          "addc   r8, r8, r7      \n\t"   \
369          "stwu   r8, 4(r4)       \n\t"
370  
371  #define MULADDC_STOP                    \
372          "addze  r5, r5          \n\t"   \
373          "addi   r4, r4, 4       \n\t"   \
374          "addi   r3, r3, 4       \n\t"   \
375          "stw    r5, %0          \n\t"   \
376          "stw    r4, %1          \n\t"   \
377          "stw    r3, %2          \n\t"   \
378          : "=m" (c), "=m" (d), "=m" (s)              \
379          : "m" (s), "m" (d), "m" (c), "m" (b)        \
380          : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
381      );
382  
383  #else /* __MACH__ && __APPLE__ */
384  
385  #define MULADDC_INIT                        \
386      asm(                                    \
387          "lwz    %%r3, %3            \n\t"   \
388          "lwz    %%r4, %4            \n\t"   \
389          "lwz    %%r5, %5            \n\t"   \
390          "lwz    %%r6, %6            \n\t"   \
391          "addi   %%r3, %%r3, -4      \n\t"   \
392          "addi   %%r4, %%r4, -4      \n\t"   \
393          "addic  %%r5, %%r5,  0      \n\t"
394  
395  #define MULADDC_CORE                        \
396          "lwzu   %%r7, 4(%%r3)       \n\t"   \
397          "mullw  %%r8, %%r7, %%r6    \n\t"   \
398          "mulhwu %%r9, %%r7, %%r6    \n\t"   \
399          "adde   %%r8, %%r8, %%r5    \n\t"   \
400          "lwz    %%r7, 4(%%r4)       \n\t"   \
401          "addze  %%r5, %%r9          \n\t"   \
402          "addc   %%r8, %%r8, %%r7    \n\t"   \
403          "stwu   %%r8, 4(%%r4)       \n\t"
404  
405  #define MULADDC_STOP                        \
406          "addze  %%r5, %%r5          \n\t"   \
407          "addi   %%r4, %%r4, 4       \n\t"   \
408          "addi   %%r3, %%r3, 4       \n\t"   \
409          "stw    %%r5, %0            \n\t"   \
410          "stw    %%r4, %1            \n\t"   \
411          "stw    %%r3, %2            \n\t"   \
412          : "=m" (c), "=m" (d), "=m" (s)              \
413          : "m" (s), "m" (d), "m" (c), "m" (b)        \
414          : "r3", "r4", "r5", "r6", "r7", "r8", "r9"  \
415      );
416  
417  #endif /* __MACH__ && __APPLE__ */
418  
419  #endif /* PPC32 */
420  
421  /*
422   * The Sparc(64) assembly is reported to be broken.
423   * Disable it for now, until we're able to fix it.
424   */
425  #if 0 && defined(__sparc__)
426  #if defined(__sparc64__)
427  
428  #define MULADDC_INIT                                    \
429      asm(                                                \
430                  "ldx     %3, %%o0               \n\t"   \
431                  "ldx     %4, %%o1               \n\t"   \
432                  "ld      %5, %%o2               \n\t"   \
433                  "ld      %6, %%o3               \n\t"
434  
435  #define MULADDC_CORE                                    \
436                  "ld      [%%o0], %%o4           \n\t"   \
437                  "inc     4, %%o0                \n\t"   \
438                  "ld      [%%o1], %%o5           \n\t"   \
439                  "umul    %%o3, %%o4, %%o4       \n\t"   \
440                  "addcc   %%o4, %%o2, %%o4       \n\t"   \
441                  "rd      %%y, %%g1              \n\t"   \
442                  "addx    %%g1, 0, %%g1          \n\t"   \
443                  "addcc   %%o4, %%o5, %%o4       \n\t"   \
444                  "st      %%o4, [%%o1]           \n\t"   \
445                  "addx    %%g1, 0, %%o2          \n\t"   \
446                  "inc     4, %%o1                \n\t"
447  
448          #define MULADDC_STOP                            \
449                  "st      %%o2, %0               \n\t"   \
450                  "stx     %%o1, %1               \n\t"   \
451                  "stx     %%o0, %2               \n\t"   \
452          : "=m" (c), "=m" (d), "=m" (s)          \
453          : "m" (s), "m" (d), "m" (c), "m" (b)    \
454          : "g1", "o0", "o1", "o2", "o3", "o4",   \
455            "o5"                                  \
456          );
457  
458  #else /* __sparc64__ */
459  
460  #define MULADDC_INIT                                    \
461      asm(                                                \
462                  "ld      %3, %%o0               \n\t"   \
463                  "ld      %4, %%o1               \n\t"   \
464                  "ld      %5, %%o2               \n\t"   \
465                  "ld      %6, %%o3               \n\t"
466  
467  #define MULADDC_CORE                                    \
468                  "ld      [%%o0], %%o4           \n\t"   \
469                  "inc     4, %%o0                \n\t"   \
470                  "ld      [%%o1], %%o5           \n\t"   \
471                  "umul    %%o3, %%o4, %%o4       \n\t"   \
472                  "addcc   %%o4, %%o2, %%o4       \n\t"   \
473                  "rd      %%y, %%g1              \n\t"   \
474                  "addx    %%g1, 0, %%g1          \n\t"   \
475                  "addcc   %%o4, %%o5, %%o4       \n\t"   \
476                  "st      %%o4, [%%o1]           \n\t"   \
477                  "addx    %%g1, 0, %%o2          \n\t"   \
478                  "inc     4, %%o1                \n\t"
479  
480  #define MULADDC_STOP                                    \
481                  "st      %%o2, %0               \n\t"   \
482                  "st      %%o1, %1               \n\t"   \
483                  "st      %%o0, %2               \n\t"   \
484          : "=m" (c), "=m" (d), "=m" (s)          \
485          : "m" (s), "m" (d), "m" (c), "m" (b)    \
486          : "g1", "o0", "o1", "o2", "o3", "o4",   \
487            "o5"                                  \
488          );
489  
490  #endif /* __sparc64__ */
491  #endif /* __sparc__ */
492  
493  #if defined(__microblaze__) || defined(microblaze)
494  
495  #define MULADDC_INIT                    \
496      asm(                                \
497          "lwi   r3,   %3         \n\t"   \
498          "lwi   r4,   %4         \n\t"   \
499          "lwi   r5,   %5         \n\t"   \
500          "lwi   r6,   %6         \n\t"   \
501          "andi  r7,   r6, 0xffff \n\t"   \
502          "bsrli r6,   r6, 16     \n\t"
503  
504  #define MULADDC_CORE                    \
505          "lhui  r8,   r3,   0    \n\t"   \
506          "addi  r3,   r3,   2    \n\t"   \
507          "lhui  r9,   r3,   0    \n\t"   \
508          "addi  r3,   r3,   2    \n\t"   \
509          "mul   r10,  r9,  r6    \n\t"   \
510          "mul   r11,  r8,  r7    \n\t"   \
511          "mul   r12,  r9,  r7    \n\t"   \
512          "mul   r13,  r8,  r6    \n\t"   \
513          "bsrli  r8, r10,  16    \n\t"   \
514          "bsrli  r9, r11,  16    \n\t"   \
515          "add   r13, r13,  r8    \n\t"   \
516          "add   r13, r13,  r9    \n\t"   \
517          "bslli r10, r10,  16    \n\t"   \
518          "bslli r11, r11,  16    \n\t"   \
519          "add   r12, r12, r10    \n\t"   \
520          "addc  r13, r13,  r0    \n\t"   \
521          "add   r12, r12, r11    \n\t"   \
522          "addc  r13, r13,  r0    \n\t"   \
523          "lwi   r10,  r4,   0    \n\t"   \
524          "add   r12, r12, r10    \n\t"   \
525          "addc  r13, r13,  r0    \n\t"   \
526          "add   r12, r12,  r5    \n\t"   \
527          "addc   r5, r13,  r0    \n\t"   \
528          "swi   r12,  r4,   0    \n\t"   \
529          "addi   r4,  r4,   4    \n\t"
530  
531  #define MULADDC_STOP                    \
532          "swi   r5,   %0         \n\t"   \
533          "swi   r4,   %1         \n\t"   \
534          "swi   r3,   %2         \n\t"   \
535          : "=m" (c), "=m" (d), "=m" (s)              \
536          : "m" (s), "m" (d), "m" (c), "m" (b)        \
537          : "r3", "r4", "r5", "r6", "r7", "r8",       \
538            "r9", "r10", "r11", "r12", "r13"          \
539      );
540  
541  #endif /* MicroBlaze */
542  
543  #if defined(__tricore__)
544  
545  #define MULADDC_INIT                            \
546      asm(                                        \
547          "ld.a   %%a2, %3                \n\t"   \
548          "ld.a   %%a3, %4                \n\t"   \
549          "ld.w   %%d4, %5                \n\t"   \
550          "ld.w   %%d1, %6                \n\t"   \
551          "xor    %%d5, %%d5              \n\t"
552  
553  #define MULADDC_CORE                            \
554          "ld.w   %%d0,   [%%a2+]         \n\t"   \
555          "madd.u %%e2, %%e4, %%d0, %%d1  \n\t"   \
556          "ld.w   %%d0,   [%%a3]          \n\t"   \
557          "addx   %%d2,    %%d2,  %%d0    \n\t"   \
558          "addc   %%d3,    %%d3,    0     \n\t"   \
559          "mov    %%d4,    %%d3           \n\t"   \
560          "st.w  [%%a3+],  %%d2           \n\t"
561  
562  #define MULADDC_STOP                            \
563          "st.w   %0, %%d4                \n\t"   \
564          "st.a   %1, %%a3                \n\t"   \
565          "st.a   %2, %%a2                \n\t"   \
566          : "=m" (c), "=m" (d), "=m" (s)          \
567          : "m" (s), "m" (d), "m" (c), "m" (b)    \
568          : "d0", "d1", "e2", "d4", "a2", "a3"    \
569      );
570  
571  #endif /* TriCore */
572  
573  /*
574   * Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
575   * our use of r7 below, unless -fomit-frame-pointer is passed.
576   *
577   * On the other hand, -fomit-frame-pointer is implied by any -Ox options with
578   * x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
579   * clang and armcc5 under the same conditions).
580   *
581   * So, only use the optimized assembly below for optimized build, which avoids
582   * the build error and is pretty reasonable anyway.
583   */
584  #if defined(__GNUC__) && !defined(__OPTIMIZE__)
585  #define MULADDC_CANNOT_USE_R7
586  #endif
587  
588  #if defined(__arm__) && !defined(MULADDC_CANNOT_USE_R7)
589  
590  #if defined(__thumb__) && !defined(__thumb2__)
591  
592  #define MULADDC_INIT                                    \
593      asm(                                                \
594              "ldr    r0, %3                      \n\t"   \
595              "ldr    r1, %4                      \n\t"   \
596              "ldr    r2, %5                      \n\t"   \
597              "ldr    r3, %6                      \n\t"   \
598              "lsr    r7, r3, #16                 \n\t"   \
599              "mov    r9, r7                      \n\t"   \
600              "lsl    r7, r3, #16                 \n\t"   \
601              "lsr    r7, r7, #16                 \n\t"   \
602              "mov    r8, r7                      \n\t"
603  
604  #define MULADDC_CORE                                    \
605              "ldmia  r0!, {r6}                   \n\t"   \
606              "lsr    r7, r6, #16                 \n\t"   \
607              "lsl    r6, r6, #16                 \n\t"   \
608              "lsr    r6, r6, #16                 \n\t"   \
609              "mov    r4, r8                      \n\t"   \
610              "mul    r4, r6                      \n\t"   \
611              "mov    r3, r9                      \n\t"   \
612              "mul    r6, r3                      \n\t"   \
613              "mov    r5, r9                      \n\t"   \
614              "mul    r5, r7                      \n\t"   \
615              "mov    r3, r8                      \n\t"   \
616              "mul    r7, r3                      \n\t"   \
617              "lsr    r3, r6, #16                 \n\t"   \
618              "add    r5, r5, r3                  \n\t"   \
619              "lsr    r3, r7, #16                 \n\t"   \
620              "add    r5, r5, r3                  \n\t"   \
621              "add    r4, r4, r2                  \n\t"   \
622              "mov    r2, #0                      \n\t"   \
623              "adc    r5, r2                      \n\t"   \
624              "lsl    r3, r6, #16                 \n\t"   \
625              "add    r4, r4, r3                  \n\t"   \
626              "adc    r5, r2                      \n\t"   \
627              "lsl    r3, r7, #16                 \n\t"   \
628              "add    r4, r4, r3                  \n\t"   \
629              "adc    r5, r2                      \n\t"   \
630              "ldr    r3, [r1]                    \n\t"   \
631              "add    r4, r4, r3                  \n\t"   \
632              "adc    r2, r5                      \n\t"   \
633              "stmia  r1!, {r4}                   \n\t"
634  
635  #define MULADDC_STOP                                    \
636              "str    r2, %0                      \n\t"   \
637              "str    r1, %1                      \n\t"   \
638              "str    r0, %2                      \n\t"   \
639           : "=m" (c),  "=m" (d), "=m" (s)        \
640           : "m" (s), "m" (d), "m" (c), "m" (b)   \
641           : "r0", "r1", "r2", "r3", "r4", "r5",  \
642             "r6", "r7", "r8", "r9", "cc"         \
643           );
644  
645  #elif (__ARM_ARCH >= 6) && \
646      defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)
647  
648  #define MULADDC_INIT                            \
649      asm(
650  
651  #define MULADDC_CORE                            \
652              "ldr    r0, [%0], #4        \n\t"   \
653              "ldr    r1, [%1]            \n\t"   \
654              "umaal  r1, %2, %3, r0      \n\t"   \
655              "str    r1, [%1], #4        \n\t"
656  
657  #define MULADDC_STOP                            \
658           : "=r" (s),  "=r" (d), "=r" (c)        \
659           : "r" (b), "0" (s), "1" (d), "2" (c)   \
660           : "r0", "r1", "memory"                 \
661           );
662  
663  #else
664  
665  #define MULADDC_INIT                                    \
666      asm(                                                \
667              "ldr    r0, %3                      \n\t"   \
668              "ldr    r1, %4                      \n\t"   \
669              "ldr    r2, %5                      \n\t"   \
670              "ldr    r3, %6                      \n\t"
671  
672  #define MULADDC_CORE                                    \
673              "ldr    r4, [r0], #4                \n\t"   \
674              "mov    r5, #0                      \n\t"   \
675              "ldr    r6, [r1]                    \n\t"   \
676              "umlal  r2, r5, r3, r4              \n\t"   \
677              "adds   r7, r6, r2                  \n\t"   \
678              "adc    r2, r5, #0                  \n\t"   \
679              "str    r7, [r1], #4                \n\t"
680  
681  #define MULADDC_STOP                                    \
682              "str    r2, %0                      \n\t"   \
683              "str    r1, %1                      \n\t"   \
684              "str    r0, %2                      \n\t"   \
685           : "=m" (c),  "=m" (d), "=m" (s)        \
686           : "m" (s), "m" (d), "m" (c), "m" (b)   \
687           : "r0", "r1", "r2", "r3", "r4", "r5",  \
688             "r6", "r7", "cc"                     \
689           );
690  
691  #endif /* Thumb */
692  
693  #endif /* ARMv3 */
694  
695  #if defined(__alpha__)
696  
697  #define MULADDC_INIT                    \
698      asm(                                \
699          "ldq    $1, %3          \n\t"   \
700          "ldq    $2, %4          \n\t"   \
701          "ldq    $3, %5          \n\t"   \
702          "ldq    $4, %6          \n\t"
703  
704  #define MULADDC_CORE                    \
705          "ldq    $6,  0($1)      \n\t"   \
706          "addq   $1,  8, $1      \n\t"   \
707          "mulq   $6, $4, $7      \n\t"   \
708          "umulh  $6, $4, $6      \n\t"   \
709          "addq   $7, $3, $7      \n\t"   \
710          "cmpult $7, $3, $3      \n\t"   \
711          "ldq    $5,  0($2)      \n\t"   \
712          "addq   $7, $5, $7      \n\t"   \
713          "cmpult $7, $5, $5      \n\t"   \
714          "stq    $7,  0($2)      \n\t"   \
715          "addq   $2,  8, $2      \n\t"   \
716          "addq   $6, $3, $3      \n\t"   \
717          "addq   $5, $3, $3      \n\t"
718  
719  #define MULADDC_STOP                                    \
720          "stq    $3, %0          \n\t"   \
721          "stq    $2, %1          \n\t"   \
722          "stq    $1, %2          \n\t"   \
723          : "=m" (c), "=m" (d), "=m" (s)              \
724          : "m" (s), "m" (d), "m" (c), "m" (b)        \
725          : "$1", "$2", "$3", "$4", "$5", "$6", "$7"  \
726      );
727  #endif /* Alpha */
728  
729  #if defined(__mips__) && !defined(__mips64)
730  
731  #define MULADDC_INIT                    \
732      asm(                                \
733          "lw     $10, %3         \n\t"   \
734          "lw     $11, %4         \n\t"   \
735          "lw     $12, %5         \n\t"   \
736          "lw     $13, %6         \n\t"
737  
738  #define MULADDC_CORE                    \
739          "lw     $14, 0($10)     \n\t"   \
740          "multu  $13, $14        \n\t"   \
741          "addi   $10, $10, 4     \n\t"   \
742          "mflo   $14             \n\t"   \
743          "mfhi   $9              \n\t"   \
744          "addu   $14, $12, $14   \n\t"   \
745          "lw     $15, 0($11)     \n\t"   \
746          "sltu   $12, $14, $12   \n\t"   \
747          "addu   $15, $14, $15   \n\t"   \
748          "sltu   $14, $15, $14   \n\t"   \
749          "addu   $12, $12, $9    \n\t"   \
750          "sw     $15, 0($11)     \n\t"   \
751          "addu   $12, $12, $14   \n\t"   \
752          "addi   $11, $11, 4     \n\t"
753  
754  #define MULADDC_STOP                    \
755          "sw     $12, %0         \n\t"   \
756          "sw     $11, %1         \n\t"   \
757          "sw     $10, %2         \n\t"   \
758          : "=m" (c), "=m" (d), "=m" (s)                      \
759          : "m" (s), "m" (d), "m" (c), "m" (b)                \
760          : "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \
761      );
762  
763  #endif /* MIPS */
764  #endif /* GNUC */
765  
766  #if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
767  
768  #define MULADDC_INIT                            \
769      __asm   mov     esi, s                      \
770      __asm   mov     edi, d                      \
771      __asm   mov     ecx, c                      \
772      __asm   mov     ebx, b
773  
774  #define MULADDC_CORE                            \
775      __asm   lodsd                               \
776      __asm   mul     ebx                         \
777      __asm   add     eax, ecx                    \
778      __asm   adc     edx, 0                      \
779      __asm   add     eax, [edi]                  \
780      __asm   adc     edx, 0                      \
781      __asm   mov     ecx, edx                    \
782      __asm   stosd
783  
784  #if defined(MBEDTLS_HAVE_SSE2)
785  
786  #define EMIT __asm _emit
787  
788  #define MULADDC_HUIT                            \
789      EMIT 0x0F  EMIT 0x6E  EMIT 0xC9             \
790      EMIT 0x0F  EMIT 0x6E  EMIT 0xC3             \
791      EMIT 0x0F  EMIT 0x6E  EMIT 0x1F             \
792      EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
793      EMIT 0x0F  EMIT 0x6E  EMIT 0x16             \
794      EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
795      EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x04  \
796      EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
797      EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x08  \
798      EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
799      EMIT 0x0F  EMIT 0x6E  EMIT 0x7E  EMIT 0x0C  \
800      EMIT 0x0F  EMIT 0xF4  EMIT 0xF8             \
801      EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
802      EMIT 0x0F  EMIT 0x6E  EMIT 0x5F  EMIT 0x04  \
803      EMIT 0x0F  EMIT 0xD4  EMIT 0xDC             \
804      EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x08  \
805      EMIT 0x0F  EMIT 0xD4  EMIT 0xEE             \
806      EMIT 0x0F  EMIT 0x6E  EMIT 0x67  EMIT 0x0C  \
807      EMIT 0x0F  EMIT 0xD4  EMIT 0xFC             \
808      EMIT 0x0F  EMIT 0x7E  EMIT 0x0F             \
809      EMIT 0x0F  EMIT 0x6E  EMIT 0x56  EMIT 0x10  \
810      EMIT 0x0F  EMIT 0xF4  EMIT 0xD0             \
811      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
812      EMIT 0x0F  EMIT 0x6E  EMIT 0x66  EMIT 0x14  \
813      EMIT 0x0F  EMIT 0xF4  EMIT 0xE0             \
814      EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
815      EMIT 0x0F  EMIT 0x6E  EMIT 0x76  EMIT 0x18  \
816      EMIT 0x0F  EMIT 0xF4  EMIT 0xF0             \
817      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x04  \
818      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
819      EMIT 0x0F  EMIT 0x6E  EMIT 0x5E  EMIT 0x1C  \
820      EMIT 0x0F  EMIT 0xF4  EMIT 0xD8             \
821      EMIT 0x0F  EMIT 0xD4  EMIT 0xCD             \
822      EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x10  \
823      EMIT 0x0F  EMIT 0xD4  EMIT 0xD5             \
824      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x08  \
825      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
826      EMIT 0x0F  EMIT 0xD4  EMIT 0xCF             \
827      EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x14  \
828      EMIT 0x0F  EMIT 0xD4  EMIT 0xE5             \
829      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x0C  \
830      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
831      EMIT 0x0F  EMIT 0xD4  EMIT 0xCA             \
832      EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x18  \
833      EMIT 0x0F  EMIT 0xD4  EMIT 0xF5             \
834      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x10  \
835      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
836      EMIT 0x0F  EMIT 0xD4  EMIT 0xCC             \
837      EMIT 0x0F  EMIT 0x6E  EMIT 0x6F  EMIT 0x1C  \
838      EMIT 0x0F  EMIT 0xD4  EMIT 0xDD             \
839      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x14  \
840      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
841      EMIT 0x0F  EMIT 0xD4  EMIT 0xCE             \
842      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x18  \
843      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
844      EMIT 0x0F  EMIT 0xD4  EMIT 0xCB             \
845      EMIT 0x0F  EMIT 0x7E  EMIT 0x4F  EMIT 0x1C  \
846      EMIT 0x83  EMIT 0xC7  EMIT 0x20             \
847      EMIT 0x83  EMIT 0xC6  EMIT 0x20             \
848      EMIT 0x0F  EMIT 0x73  EMIT 0xD1  EMIT 0x20  \
849      EMIT 0x0F  EMIT 0x7E  EMIT 0xC9
850  
851  #define MULADDC_STOP                            \
852      EMIT 0x0F  EMIT 0x77                        \
853      __asm   mov     c, ecx                      \
854      __asm   mov     d, edi                      \
855      __asm   mov     s, esi                      \
856  
857  #else
858  
859  #define MULADDC_STOP                            \
860      __asm   mov     c, ecx                      \
861      __asm   mov     d, edi                      \
862      __asm   mov     s, esi                      \
863  
864  #endif /* SSE2 */
865  #endif /* MSVC */
866  
867  #endif /* MBEDTLS_HAVE_ASM */
868  
869  #if !defined(MULADDC_CORE)
870  #if defined(MBEDTLS_HAVE_UDBL)
871  
872  #define MULADDC_INIT                    \
873  {                                       \
874      mbedtls_t_udbl r;                           \
875      mbedtls_mpi_uint r0, r1;
876  
877  #define MULADDC_CORE                    \
878      r   = *(s++) * (mbedtls_t_udbl) b;          \
879      r0  = (mbedtls_mpi_uint) r;                   \
880      r1  = (mbedtls_mpi_uint)( r >> biL );         \
881      r0 += c;  r1 += (r0 <  c);          \
882      r0 += *d; r1 += (r0 < *d);          \
883      c = r1; *(d++) = r0;
884  
885  #define MULADDC_STOP                    \
886  }
887  
888  #else
889  #define MULADDC_INIT                    \
890  {                                       \
891      mbedtls_mpi_uint s0, s1, b0, b1;              \
892      mbedtls_mpi_uint r0, r1, rx, ry;              \
893      b0 = ( b << biH ) >> biH;           \
894      b1 = ( b >> biH );
895  
896  #define MULADDC_CORE                    \
897      s0 = ( *s << biH ) >> biH;          \
898      s1 = ( *s >> biH ); s++;            \
899      rx = s0 * b1; r0 = s0 * b0;         \
900      ry = s1 * b0; r1 = s1 * b1;         \
901      r1 += ( rx >> biH );                \
902      r1 += ( ry >> biH );                \
903      rx <<= biH; ry <<= biH;             \
904      r0 += rx; r1 += (r0 < rx);          \
905      r0 += ry; r1 += (r0 < ry);          \
906      r0 +=  c; r1 += (r0 <  c);          \
907      r0 += *d; r1 += (r0 < *d);          \
908      c = r1; *(d++) = r0;
909  
910  #define MULADDC_STOP                    \
911  }
912  
913  #endif /* C (generic)  */
914  #endif /* C (longlong) */
915  
916  #endif /* bn_mul.h */
917