• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.text
2
3.set	noat
4.set	noreorder
5
6.align	5
7.globl	bn_mul_mont
8.ent	bn_mul_mont
9bn_mul_mont:
10	slt	$1,$9,4
11	bnez	$1,1f
12	li	$2,0
13	slt	$1,$9,17	# on in-order CPU
14	bnezl	$1,bn_mul_mont_internal
15	nop
161:	jr	$31
17	li	$4,0
18.end	bn_mul_mont
19
20.align	5
21.ent	bn_mul_mont_internal
22bn_mul_mont_internal:
23	.frame	$30,14*4,$31
24	.mask	0x40000000|16711680,-4
25	sub $29,14*4
26	sw	$30,(14-1)*4($29)
27	sw	$23,(14-2)*4($29)
28	sw	$22,(14-3)*4($29)
29	sw	$21,(14-4)*4($29)
30	sw	$20,(14-5)*4($29)
31	sw	$19,(14-6)*4($29)
32	sw	$18,(14-7)*4($29)
33	sw	$17,(14-8)*4($29)
34	sw	$16,(14-9)*4($29)
35	move	$30,$29
36
37	.set	reorder
38	lw	$8,0($8)
39	lw	$13,0($6)	# bp[0]
40	lw	$12,0($5)	# ap[0]
41	lw	$14,0($7)	# np[0]
42
43	sub $29,2*4	# place for two extra words
44	sll	$9,2
45	li	$1,-4096
46	sub $29,$9
47	and	$29,$1
48
49	multu	$12,$13
50	lw	$16,4($5)
51	lw	$18,4($7)
52	mflo	$10
53	mfhi	$11
54	multu	$10,$8
55	mflo	$23
56
57	multu	$16,$13
58	mflo	$16
59	mfhi	$17
60
61	multu	$14,$23
62	mflo	$24
63	mfhi	$25
64	multu	$18,$23
65	addu	$24,$10
66	sltu	$1,$24,$10
67	addu	$25,$1
68	mflo	$18
69	mfhi	$19
70
71	move	$15,$29
72	li	$22,2*4
73.align	4
74.L1st:
75	.set	noreorder
76	add $12,$5,$22
77	add $14,$7,$22
78	lw	$12,($12)
79	lw	$14,($14)
80
81	multu	$12,$13
82	addu	$10,$16,$11
83	addu	$24,$18,$25
84	sltu	$1,$10,$11
85	sltu	$2,$24,$25
86	addu	$11,$17,$1
87	addu	$25,$19,$2
88	mflo	$16
89	mfhi	$17
90
91	addu	$24,$10
92	sltu	$1,$24,$10
93	multu	$14,$23
94	addu	$25,$1
95	addu	$22,4
96	sw	$24,($15)
97	sltu	$2,$22,$9
98	mflo	$18
99	mfhi	$19
100
101	bnez	$2,.L1st
102	add $15,4
103	.set	reorder
104
105	addu	$10,$16,$11
106	sltu	$1,$10,$11
107	addu	$11,$17,$1
108
109	addu	$24,$18,$25
110	sltu	$2,$24,$25
111	addu	$25,$19,$2
112	addu	$24,$10
113	sltu	$1,$24,$10
114	addu	$25,$1
115
116	sw	$24,($15)
117
118	addu	$25,$11
119	sltu	$1,$25,$11
120	sw	$25,4($15)
121	sw	$1,2*4($15)
122
123	li	$21,4
124.align	4
125.Louter:
126	add $13,$6,$21
127	lw	$13,($13)
128	lw	$12,($5)
129	lw	$16,4($5)
130	lw	$20,($29)
131
132	multu	$12,$13
133	lw	$14,($7)
134	lw	$18,4($7)
135	mflo	$10
136	mfhi	$11
137	addu	$10,$20
138	multu	$10,$8
139	sltu	$1,$10,$20
140	addu	$11,$1
141	mflo	$23
142
143	multu	$16,$13
144	mflo	$16
145	mfhi	$17
146
147	multu	$14,$23
148	mflo	$24
149	mfhi	$25
150
151	multu	$18,$23
152	addu	$24,$10
153	sltu	$1,$24,$10
154	addu	$25,$1
155	mflo	$18
156	mfhi	$19
157
158	move	$15,$29
159	li	$22,2*4
160	lw	$20,4($15)
161.align	4
162.Linner:
163	.set	noreorder
164	add $12,$5,$22
165	add $14,$7,$22
166	lw	$12,($12)
167	lw	$14,($14)
168
169	multu	$12,$13
170	addu	$10,$16,$11
171	addu	$24,$18,$25
172	sltu	$1,$10,$11
173	sltu	$2,$24,$25
174	addu	$11,$17,$1
175	addu	$25,$19,$2
176	mflo	$16
177	mfhi	$17
178
179	addu	$10,$20
180	addu	$22,4
181	multu	$14,$23
182	sltu	$1,$10,$20
183	addu	$24,$10
184	addu	$11,$1
185	sltu	$2,$24,$10
186	lw	$20,2*4($15)
187	addu	$25,$2
188	sltu	$1,$22,$9
189	mflo	$18
190	mfhi	$19
191	sw	$24,($15)
192	bnez	$1,.Linner
193	add $15,4
194	.set	reorder
195
196	addu	$10,$16,$11
197	sltu	$1,$10,$11
198	addu	$11,$17,$1
199	addu	$10,$20
200	sltu	$2,$10,$20
201	addu	$11,$2
202
203	lw	$20,2*4($15)
204	addu	$24,$18,$25
205	sltu	$1,$24,$25
206	addu	$25,$19,$1
207	addu	$24,$10
208	sltu	$2,$24,$10
209	addu	$25,$2
210	sw	$24,($15)
211
212	addu	$24,$25,$11
213	sltu	$25,$24,$11
214	addu	$24,$20
215	sltu	$1,$24,$20
216	addu	$25,$1
217	sw	$24,4($15)
218	sw	$25,2*4($15)
219
220	addu	$21,4
221	sltu	$2,$21,$9
222	bnez	$2,.Louter
223
224	.set	noreorder
225	add $20,$29,$9	# &tp[num]
226	move	$15,$29
227	move	$5,$29
228	li	$11,0		# clear borrow bit
229
230.align	4
231.Lsub:	lw	$10,($15)
232	lw	$24,($7)
233	add $15,4
234	add $7,4
235	subu	$24,$10,$24	# tp[i]-np[i]
236	sgtu	$1,$24,$10
237	subu	$10,$24,$11
238	sgtu	$11,$10,$24
239	sw	$10,($4)
240	or	$11,$1
241	sltu	$1,$15,$20
242	bnez	$1,.Lsub
243	add $4,4
244
245	subu	$11,$25,$11	# handle upmost overflow bit
246	move	$15,$29
247	sub $4,$9	# restore rp
248	not	$25,$11
249
250	and	$5,$11,$29
251	and	$6,$25,$4
252	or	$5,$5,$6	# ap=borrow?tp:rp
253
254.align	4
255.Lcopy:	lw	$12,($5)
256	add $5,4
257	sw	$0,($15)
258	add $15,4
259	sltu	$1,$15,$20
260	sw	$12,($4)
261	bnez	$1,.Lcopy
262	add $4,4
263
264	li	$4,1
265	li	$2,1
266
267	.set	noreorder
268	move	$29,$30
269	lw	$30,(14-1)*4($29)
270	lw	$23,(14-2)*4($29)
271	lw	$22,(14-3)*4($29)
272	lw	$21,(14-4)*4($29)
273	lw	$20,(14-5)*4($29)
274	lw	$19,(14-6)*4($29)
275	lw	$18,(14-7)*4($29)
276	lw	$17,(14-8)*4($29)
277	lw	$16,(14-9)*4($29)
278	jr	$31
279	add $29,14*4
280.end	bn_mul_mont_internal
281.rdata
282.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
283