• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8
9%ifdef BORINGSSL_PREFIX
10%include "boringssl_prefix_symbols_nasm.inc"
11%endif
12section	.text code align=64
13
14
15
16
17global	beeu_mod_inverse_vartime
18ALIGN	32
19beeu_mod_inverse_vartime:
20	mov	QWORD[8+rsp],rdi	;WIN64 prologue
21	mov	QWORD[16+rsp],rsi
22	mov	rax,rsp
23$L$SEH_begin_beeu_mod_inverse_vartime:
24	mov	rdi,rcx
25	mov	rsi,rdx
26	mov	rdx,r8
27	mov	rcx,r9
28	mov	r8,QWORD[40+rsp]
29	mov	r9,QWORD[48+rsp]
30
31
32
33	push	rbp
34
35	push	r12
36
37	push	r13
38
39	push	r14
40
41	push	r15
42
43	push	rbx
44
45	push	rsi
46
47
48	sub	rsp,80
49
50	mov	QWORD[rsp],rdi
51
52
53	mov	r8,1
54	xor	r9,r9
55	xor	r10,r10
56	xor	r11,r11
57	xor	rdi,rdi
58
59	xor	r12,r12
60	xor	r13,r13
61	xor	r14,r14
62	xor	r15,r15
63	xor	rbp,rbp
64
65
66	vmovdqu	xmm0,XMMWORD[rsi]
67	vmovdqu	xmm1,XMMWORD[16+rsi]
68	vmovdqu	XMMWORD[48+rsp],xmm0
69	vmovdqu	XMMWORD[64+rsp],xmm1
70
71	vmovdqu	xmm0,XMMWORD[rdx]
72	vmovdqu	xmm1,XMMWORD[16+rdx]
73	vmovdqu	XMMWORD[16+rsp],xmm0
74	vmovdqu	XMMWORD[32+rsp],xmm1
75
76$L$beeu_loop:
77	xor	rbx,rbx
78	or	rbx,QWORD[48+rsp]
79	or	rbx,QWORD[56+rsp]
80	or	rbx,QWORD[64+rsp]
81	or	rbx,QWORD[72+rsp]
82	jz	NEAR $L$beeu_loop_end
83
84
85
86
87
88
89
90
91
92
93	mov	rcx,1
94
95
96$L$beeu_shift_loop_XB:
97	mov	rbx,rcx
98	and	rbx,QWORD[48+rsp]
99	jnz	NEAR $L$beeu_shift_loop_end_XB
100
101
102	mov	rbx,1
103	and	rbx,r8
104	jz	NEAR $L$shift1_0
105	add	r8,QWORD[rdx]
106	adc	r9,QWORD[8+rdx]
107	adc	r10,QWORD[16+rdx]
108	adc	r11,QWORD[24+rdx]
109	adc	rdi,0
110
111$L$shift1_0:
112	shrd	r8,r9,1
113	shrd	r9,r10,1
114	shrd	r10,r11,1
115	shrd	r11,rdi,1
116	shr	rdi,1
117
118	shl	rcx,1
119
120
121
122
123
124	cmp	rcx,0x8000000
125	jne	NEAR $L$beeu_shift_loop_XB
126
127$L$beeu_shift_loop_end_XB:
128	bsf	rcx,rcx
129	test	rcx,rcx
130	jz	NEAR $L$beeu_no_shift_XB
131
132
133
134	mov	rax,QWORD[((8+48))+rsp]
135	mov	rbx,QWORD[((16+48))+rsp]
136	mov	rsi,QWORD[((24+48))+rsp]
137
138	shrd	QWORD[((0+48))+rsp],rax,cl
139	shrd	QWORD[((8+48))+rsp],rbx,cl
140	shrd	QWORD[((16+48))+rsp],rsi,cl
141
142	shr	rsi,cl
143	mov	QWORD[((24+48))+rsp],rsi
144
145
146$L$beeu_no_shift_XB:
147
148	mov	rcx,1
149
150
151$L$beeu_shift_loop_YA:
152	mov	rbx,rcx
153	and	rbx,QWORD[16+rsp]
154	jnz	NEAR $L$beeu_shift_loop_end_YA
155
156
157	mov	rbx,1
158	and	rbx,r12
159	jz	NEAR $L$shift1_1
160	add	r12,QWORD[rdx]
161	adc	r13,QWORD[8+rdx]
162	adc	r14,QWORD[16+rdx]
163	adc	r15,QWORD[24+rdx]
164	adc	rbp,0
165
166$L$shift1_1:
167	shrd	r12,r13,1
168	shrd	r13,r14,1
169	shrd	r14,r15,1
170	shrd	r15,rbp,1
171	shr	rbp,1
172
173	shl	rcx,1
174
175
176
177
178
179	cmp	rcx,0x8000000
180	jne	NEAR $L$beeu_shift_loop_YA
181
182$L$beeu_shift_loop_end_YA:
183	bsf	rcx,rcx
184	test	rcx,rcx
185	jz	NEAR $L$beeu_no_shift_YA
186
187
188
189	mov	rax,QWORD[((8+16))+rsp]
190	mov	rbx,QWORD[((16+16))+rsp]
191	mov	rsi,QWORD[((24+16))+rsp]
192
193	shrd	QWORD[((0+16))+rsp],rax,cl
194	shrd	QWORD[((8+16))+rsp],rbx,cl
195	shrd	QWORD[((16+16))+rsp],rsi,cl
196
197	shr	rsi,cl
198	mov	QWORD[((24+16))+rsp],rsi
199
200
201$L$beeu_no_shift_YA:
202
203	mov	rax,QWORD[48+rsp]
204	mov	rbx,QWORD[56+rsp]
205	mov	rsi,QWORD[64+rsp]
206	mov	rcx,QWORD[72+rsp]
207	sub	rax,QWORD[16+rsp]
208	sbb	rbx,QWORD[24+rsp]
209	sbb	rsi,QWORD[32+rsp]
210	sbb	rcx,QWORD[40+rsp]
211	jnc	NEAR $L$beeu_B_bigger_than_A
212
213
214	mov	rax,QWORD[16+rsp]
215	mov	rbx,QWORD[24+rsp]
216	mov	rsi,QWORD[32+rsp]
217	mov	rcx,QWORD[40+rsp]
218	sub	rax,QWORD[48+rsp]
219	sbb	rbx,QWORD[56+rsp]
220	sbb	rsi,QWORD[64+rsp]
221	sbb	rcx,QWORD[72+rsp]
222	mov	QWORD[16+rsp],rax
223	mov	QWORD[24+rsp],rbx
224	mov	QWORD[32+rsp],rsi
225	mov	QWORD[40+rsp],rcx
226
227
228	add	r12,r8
229	adc	r13,r9
230	adc	r14,r10
231	adc	r15,r11
232	adc	rbp,rdi
233	jmp	NEAR $L$beeu_loop
234
235$L$beeu_B_bigger_than_A:
236
237	mov	QWORD[48+rsp],rax
238	mov	QWORD[56+rsp],rbx
239	mov	QWORD[64+rsp],rsi
240	mov	QWORD[72+rsp],rcx
241
242
243	add	r8,r12
244	adc	r9,r13
245	adc	r10,r14
246	adc	r11,r15
247	adc	rdi,rbp
248
249	jmp	NEAR $L$beeu_loop
250
251$L$beeu_loop_end:
252
253
254
255
256	mov	rbx,QWORD[16+rsp]
257	sub	rbx,1
258	or	rbx,QWORD[24+rsp]
259	or	rbx,QWORD[32+rsp]
260	or	rbx,QWORD[40+rsp]
261
262	jnz	NEAR $L$beeu_err
263
264
265
266
267	mov	r8,QWORD[rdx]
268	mov	r9,QWORD[8+rdx]
269	mov	r10,QWORD[16+rdx]
270	mov	r11,QWORD[24+rdx]
271	xor	rdi,rdi
272
273$L$beeu_reduction_loop:
274	mov	QWORD[16+rsp],r12
275	mov	QWORD[24+rsp],r13
276	mov	QWORD[32+rsp],r14
277	mov	QWORD[40+rsp],r15
278	mov	QWORD[48+rsp],rbp
279
280
281	sub	r12,r8
282	sbb	r13,r9
283	sbb	r14,r10
284	sbb	r15,r11
285	sbb	rbp,0
286
287
288	cmovc	r12,QWORD[16+rsp]
289	cmovc	r13,QWORD[24+rsp]
290	cmovc	r14,QWORD[32+rsp]
291	cmovc	r15,QWORD[40+rsp]
292	jnc	NEAR $L$beeu_reduction_loop
293
294
295	sub	r8,r12
296	sbb	r9,r13
297	sbb	r10,r14
298	sbb	r11,r15
299
300$L$beeu_save:
301
302	mov	rdi,QWORD[rsp]
303
304	mov	QWORD[rdi],r8
305	mov	QWORD[8+rdi],r9
306	mov	QWORD[16+rdi],r10
307	mov	QWORD[24+rdi],r11
308
309
310	mov	rax,1
311	jmp	NEAR $L$beeu_finish
312
313$L$beeu_err:
314
315	xor	rax,rax
316
317$L$beeu_finish:
318	add	rsp,80
319
320	pop	rsi
321
322	pop	rbx
323
324	pop	r15
325
326	pop	r14
327
328	pop	r13
329
330	pop	r12
331
332	pop	rbp
333
334	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
335	mov	rsi,QWORD[16+rsp]
336	DB	0F3h,0C3h		;repret
337
338
339$L$SEH_end_beeu_mod_inverse_vartime:
340