• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#include "mips_arch.h"
2
3.text
4
5.set	noat
6.set	noreorder
7
8.align	5
9.globl	bn_mul_mont
10.ent	bn_mul_mont
11bn_mul_mont:
12	slt	$1,$9,4
13	bnez	$1,1f
14	li	$2,0
15	slt	$1,$9,17	# on in-order CPU
16	bnez	$1,bn_mul_mont_internal
17	nop
181:	jr	$31
19	li	$4,0
20.end	bn_mul_mont
21
22.align	5
23.ent	bn_mul_mont_internal
24bn_mul_mont_internal:
25	.frame	$30,14*8,$31
26	.mask	0x40000000|16711680,-8
27	dsubu $29,14*8
28	sd	$30,(14-1)*8($29)
29	sd	$23,(14-2)*8($29)
30	sd	$22,(14-3)*8($29)
31	sd	$21,(14-4)*8($29)
32	sd	$20,(14-5)*8($29)
33	sd	$19,(14-6)*8($29)
34	sd	$18,(14-7)*8($29)
35	sd	$17,(14-8)*8($29)
36	sd	$16,(14-9)*8($29)
37	move	$30,$29
38
39	.set	reorder
40	ld	$8,0($8)
41	ld	$13,0($6)	# bp[0]
42	ld	$12,0($5)	# ap[0]
43	ld	$14,0($7)	# np[0]
44
45	dsubu $29,2*8	# place for two extra words
46	sll	$9,3
47	li	$1,-4096
48	dsubu $29,$9
49	and	$29,$1
50
51	dmultu	($12,$13)
52	ld	$17,8($5)
53	ld	$19,8($7)
54	mflo	($10,$12,$13)
55	mfhi	($11,$12,$13)
56	dmultu	($10,$8)
57	mflo	($23,$10,$8)
58
59	dmultu	($17,$13)
60	mflo	($16,$17,$13)
61	mfhi	($17,$17,$13)
62
63	dmultu	($14,$23)
64	mflo	($24,$14,$23)
65	mfhi	($25,$14,$23)
66	dmultu	($19,$23)
67	daddu	$24,$10
68	sltu	$1,$24,$10
69	daddu	$25,$1
70	mflo	($18,$19,$23)
71	mfhi	($19,$19,$23)
72
73	move	$15,$29
74	li	$22,2*8
75.align	4
76.L1st:
77	.set	noreorder
78	daddu $12,$5,$22
79	daddu $14,$7,$22
80	ld	$12,($12)
81	ld	$14,($14)
82
83	dmultu	($12,$13)
84	daddu	$10,$16,$11
85	daddu	$24,$18,$25
86	sltu	$1,$10,$11
87	sltu	$2,$24,$25
88	daddu	$11,$17,$1
89	daddu	$25,$19,$2
90	mflo	($16,$12,$13)
91	mfhi	($17,$12,$13)
92
93	daddu	$24,$10
94	sltu	$1,$24,$10
95	dmultu	($14,$23)
96	daddu	$25,$1
97	addu	$22,8
98	sd	$24,($15)
99	sltu	$2,$22,$9
100	mflo	($18,$14,$23)
101	mfhi	($19,$14,$23)
102
103	bnez	$2,.L1st
104	daddu $15,8
105	.set	reorder
106
107	daddu	$10,$16,$11
108	sltu	$1,$10,$11
109	daddu	$11,$17,$1
110
111	daddu	$24,$18,$25
112	sltu	$2,$24,$25
113	daddu	$25,$19,$2
114	daddu	$24,$10
115	sltu	$1,$24,$10
116	daddu	$25,$1
117
118	sd	$24,($15)
119
120	daddu	$25,$11
121	sltu	$1,$25,$11
122	sd	$25,8($15)
123	sd	$1,2*8($15)
124
125	li	$21,8
126.align	4
127.Louter:
128	daddu $13,$6,$21
129	ld	$13,($13)
130	ld	$12,($5)
131	ld	$17,8($5)
132	ld	$20,($29)
133
134	dmultu	($12,$13)
135	ld	$14,($7)
136	ld	$19,8($7)
137	mflo	($10,$12,$13)
138	mfhi	($11,$12,$13)
139	daddu	$10,$20
140	dmultu	($10,$8)
141	sltu	$1,$10,$20
142	daddu	$11,$1
143	mflo	($23,$10,$8)
144
145	dmultu	($17,$13)
146	mflo	($16,$17,$13)
147	mfhi	($17,$17,$13)
148
149	dmultu	($14,$23)
150	mflo	($24,$14,$23)
151	mfhi	($25,$14,$23)
152
153	dmultu	($19,$23)
154	daddu	$24,$10
155	sltu	$1,$24,$10
156	daddu	$25,$1
157	mflo	($18,$19,$23)
158	mfhi	($19,$19,$23)
159
160	move	$15,$29
161	li	$22,2*8
162	ld	$20,8($15)
163.align	4
164.Linner:
165	.set	noreorder
166	daddu $12,$5,$22
167	daddu $14,$7,$22
168	ld	$12,($12)
169	ld	$14,($14)
170
171	dmultu	($12,$13)
172	daddu	$10,$16,$11
173	daddu	$24,$18,$25
174	sltu	$1,$10,$11
175	sltu	$2,$24,$25
176	daddu	$11,$17,$1
177	daddu	$25,$19,$2
178	mflo	($16,$12,$13)
179	mfhi	($17,$12,$13)
180
181	daddu	$10,$20
182	addu	$22,8
183	dmultu	($14,$23)
184	sltu	$1,$10,$20
185	daddu	$24,$10
186	daddu	$11,$1
187	sltu	$2,$24,$10
188	ld	$20,2*8($15)
189	daddu	$25,$2
190	sltu	$1,$22,$9
191	mflo	($18,$14,$23)
192	mfhi	($19,$14,$23)
193	sd	$24,($15)
194	bnez	$1,.Linner
195	daddu $15,8
196	.set	reorder
197
198	daddu	$10,$16,$11
199	sltu	$1,$10,$11
200	daddu	$11,$17,$1
201	daddu	$10,$20
202	sltu	$2,$10,$20
203	daddu	$11,$2
204
205	ld	$20,2*8($15)
206	daddu	$24,$18,$25
207	sltu	$1,$24,$25
208	daddu	$25,$19,$1
209	daddu	$24,$10
210	sltu	$2,$24,$10
211	daddu	$25,$2
212	sd	$24,($15)
213
214	daddu	$24,$25,$11
215	sltu	$25,$24,$11
216	daddu	$24,$20
217	sltu	$1,$24,$20
218	daddu	$25,$1
219	sd	$24,8($15)
220	sd	$25,2*8($15)
221
222	addu	$21,8
223	sltu	$2,$21,$9
224	bnez	$2,.Louter
225
226	.set	noreorder
227	daddu $20,$29,$9	# &tp[num]
228	move	$15,$29
229	move	$5,$29
230	li	$11,0		# clear borrow bit
231
232.align	4
233.Lsub:	ld	$10,($15)
234	ld	$24,($7)
235	daddu $15,8
236	daddu $7,8
237	dsubu	$24,$10,$24	# tp[i]-np[i]
238	sgtu	$1,$24,$10
239	dsubu	$10,$24,$11
240	sgtu	$11,$10,$24
241	sd	$10,($4)
242	or	$11,$1
243	sltu	$1,$15,$20
244	bnez	$1,.Lsub
245	daddu $4,8
246
247	dsubu	$11,$25,$11	# handle upmost overflow bit
248	move	$15,$29
249	dsubu $4,$9	# restore rp
250	not	$25,$11
251
252.Lcopy:	ld	$14,($15)	# conditional move
253	ld	$12,($4)
254	sd	$0,($15)
255	daddu $15,8
256	and	$14,$11
257	and	$12,$25
258	or	$12,$14
259	sltu	$1,$15,$20
260	sd	$12,($4)
261	bnez	$1,.Lcopy
262	daddu $4,8
263
264	li	$4,1
265	li	$2,1
266
267	.set	noreorder
268	move	$29,$30
269	ld	$30,(14-1)*8($29)
270	ld	$23,(14-2)*8($29)
271	ld	$22,(14-3)*8($29)
272	ld	$21,(14-4)*8($29)
273	ld	$20,(14-5)*8($29)
274	ld	$19,(14-6)*8($29)
275	ld	$18,(14-7)*8($29)
276	ld	$17,(14-8)*8($29)
277	ld	$16,(14-9)*8($29)
278	jr	$31
279	daddu $29,14*8
280.end	bn_mul_mont_internal
281.rdata
282.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
283