• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.set	mips2
2.rdata
3.asciiz	"mips3.s, Version 1.2"
4.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5
6.text
7.set	noat
8
9.align	5
10.globl	bn_mul_add_words
11.ent	bn_mul_add_words
12bn_mul_add_words:
13	.set	noreorder
14	bgtz	$6,bn_mul_add_words_internal
15	move	$2,$0
16	jr	$31
17	move	$4,$2
18.end	bn_mul_add_words
19
20.align	5
21.ent	bn_mul_add_words_internal
22bn_mul_add_words_internal:
23	.set	reorder
24	li	$3,-4
25	and	$8,$6,$3
26	lw	$12,0($5)
27	beqz	$8,.L_bn_mul_add_words_tail
28
29.L_bn_mul_add_words_loop:
30	multu	$12,$7
31	lw	$13,0($4)
32	lw	$14,4($5)
33	lw	$15,4($4)
34	lw	$8,2*4($5)
35	lw	$9,2*4($4)
36	addu	$13,$2
37	sltu	$2,$13,$2	# All manuals say it "compares 32-bit
38				# values", but it seems to work fine
39				# even on 64-bit registers.
40	mflo	$1
41	mfhi	$12
42	addu	$13,$1
43	addu	$2,$12
44	 multu	$14,$7
45	sltu	$1,$13,$1
46	sw	$13,0($4)
47	addu	$2,$1
48
49	lw	$10,3*4($5)
50	lw	$11,3*4($4)
51	addu	$15,$2
52	sltu	$2,$15,$2
53	mflo	$1
54	mfhi	$14
55	addu	$15,$1
56	addu	$2,$14
57	 multu	$8,$7
58	sltu	$1,$15,$1
59	sw	$15,4($4)
60	addu	$2,$1
61
62	subu	$6,4
63	addu $4,4*4
64	addu $5,4*4
65	addu	$9,$2
66	sltu	$2,$9,$2
67	mflo	$1
68	mfhi	$8
69	addu	$9,$1
70	addu	$2,$8
71	 multu	$10,$7
72	sltu	$1,$9,$1
73	sw	$9,-2*4($4)
74	addu	$2,$1
75
76
77	and	$8,$6,$3
78	addu	$11,$2
79	sltu	$2,$11,$2
80	mflo	$1
81	mfhi	$10
82	addu	$11,$1
83	addu	$2,$10
84	sltu	$1,$11,$1
85	sw	$11,-4($4)
86	addu	$2,$1
87	.set	noreorder
88	bgtzl	$8,.L_bn_mul_add_words_loop
89	lw	$12,0($5)
90
91	beqz	$6,.L_bn_mul_add_words_return
92	nop
93
94.L_bn_mul_add_words_tail:
95	.set	reorder
96	lw	$12,0($5)
97	multu	$12,$7
98	lw	$13,0($4)
99	subu	$6,1
100	addu	$13,$2
101	sltu	$2,$13,$2
102	mflo	$1
103	mfhi	$12
104	addu	$13,$1
105	addu	$2,$12
106	sltu	$1,$13,$1
107	sw	$13,0($4)
108	addu	$2,$1
109	beqz	$6,.L_bn_mul_add_words_return
110
111	lw	$12,4($5)
112	multu	$12,$7
113	lw	$13,4($4)
114	subu	$6,1
115	addu	$13,$2
116	sltu	$2,$13,$2
117	mflo	$1
118	mfhi	$12
119	addu	$13,$1
120	addu	$2,$12
121	sltu	$1,$13,$1
122	sw	$13,4($4)
123	addu	$2,$1
124	beqz	$6,.L_bn_mul_add_words_return
125
126	lw	$12,2*4($5)
127	multu	$12,$7
128	lw	$13,2*4($4)
129	addu	$13,$2
130	sltu	$2,$13,$2
131	mflo	$1
132	mfhi	$12
133	addu	$13,$1
134	addu	$2,$12
135	sltu	$1,$13,$1
136	sw	$13,2*4($4)
137	addu	$2,$1
138
139.L_bn_mul_add_words_return:
140	.set	noreorder
141	jr	$31
142	move	$4,$2
143.end	bn_mul_add_words_internal
144
145.align	5
146.globl	bn_mul_words
147.ent	bn_mul_words
148bn_mul_words:
149	.set	noreorder
150	bgtz	$6,bn_mul_words_internal
151	move	$2,$0
152	jr	$31
153	move	$4,$2
154.end	bn_mul_words
155
156.align	5
157.ent	bn_mul_words_internal
158bn_mul_words_internal:
159	.set	reorder
160	li	$3,-4
161	and	$8,$6,$3
162	lw	$12,0($5)
163	beqz	$8,.L_bn_mul_words_tail
164
165.L_bn_mul_words_loop:
166	multu	$12,$7
167	lw	$14,4($5)
168	lw	$8,2*4($5)
169	lw	$10,3*4($5)
170	mflo	$1
171	mfhi	$12
172	addu	$2,$1
173	sltu	$13,$2,$1
174	 multu	$14,$7
175	sw	$2,0($4)
176	addu	$2,$13,$12
177
178	subu	$6,4
179	addu $4,4*4
180	addu $5,4*4
181	mflo	$1
182	mfhi	$14
183	addu	$2,$1
184	sltu	$15,$2,$1
185	 multu	$8,$7
186	sw	$2,-3*4($4)
187	addu	$2,$15,$14
188
189	mflo	$1
190	mfhi	$8
191	addu	$2,$1
192	sltu	$9,$2,$1
193	 multu	$10,$7
194	sw	$2,-2*4($4)
195	addu	$2,$9,$8
196
197	and	$8,$6,$3
198	mflo	$1
199	mfhi	$10
200	addu	$2,$1
201	sltu	$11,$2,$1
202	sw	$2,-4($4)
203	addu	$2,$11,$10
204	.set	noreorder
205	bgtzl	$8,.L_bn_mul_words_loop
206	lw	$12,0($5)
207
208	beqz	$6,.L_bn_mul_words_return
209	nop
210
211.L_bn_mul_words_tail:
212	.set	reorder
213	lw	$12,0($5)
214	multu	$12,$7
215	subu	$6,1
216	mflo	$1
217	mfhi	$12
218	addu	$2,$1
219	sltu	$13,$2,$1
220	sw	$2,0($4)
221	addu	$2,$13,$12
222	beqz	$6,.L_bn_mul_words_return
223
224	lw	$12,4($5)
225	multu	$12,$7
226	subu	$6,1
227	mflo	$1
228	mfhi	$12
229	addu	$2,$1
230	sltu	$13,$2,$1
231	sw	$2,4($4)
232	addu	$2,$13,$12
233	beqz	$6,.L_bn_mul_words_return
234
235	lw	$12,2*4($5)
236	multu	$12,$7
237	mflo	$1
238	mfhi	$12
239	addu	$2,$1
240	sltu	$13,$2,$1
241	sw	$2,2*4($4)
242	addu	$2,$13,$12
243
244.L_bn_mul_words_return:
245	.set	noreorder
246	jr	$31
247	move	$4,$2
248.end	bn_mul_words_internal
249
250.align	5
251.globl	bn_sqr_words
252.ent	bn_sqr_words
253bn_sqr_words:
254	.set	noreorder
255	bgtz	$6,bn_sqr_words_internal
256	move	$2,$0
257	jr	$31
258	move	$4,$2
259.end	bn_sqr_words
260
261.align	5
262.ent	bn_sqr_words_internal
263bn_sqr_words_internal:
264	.set	reorder
265	li	$3,-4
266	and	$8,$6,$3
267	lw	$12,0($5)
268	beqz	$8,.L_bn_sqr_words_tail
269
270.L_bn_sqr_words_loop:
271	multu	$12,$12
272	lw	$14,4($5)
273	lw	$8,2*4($5)
274	lw	$10,3*4($5)
275	mflo	$13
276	mfhi	$12
277	sw	$13,0($4)
278	sw	$12,4($4)
279
280	multu	$14,$14
281	subu	$6,4
282	addu $4,8*4
283	addu $5,4*4
284	mflo	$15
285	mfhi	$14
286	sw	$15,-6*4($4)
287	sw	$14,-5*4($4)
288
289	multu	$8,$8
290	mflo	$9
291	mfhi	$8
292	sw	$9,-4*4($4)
293	sw	$8,-3*4($4)
294
295
296	multu	$10,$10
297	and	$8,$6,$3
298	mflo	$11
299	mfhi	$10
300	sw	$11,-2*4($4)
301	sw	$10,-4($4)
302
303	.set	noreorder
304	bgtzl	$8,.L_bn_sqr_words_loop
305	lw	$12,0($5)
306
307	beqz	$6,.L_bn_sqr_words_return
308	nop
309
310.L_bn_sqr_words_tail:
311	.set	reorder
312	lw	$12,0($5)
313	multu	$12,$12
314	subu	$6,1
315	mflo	$13
316	mfhi	$12
317	sw	$13,0($4)
318	sw	$12,4($4)
319	beqz	$6,.L_bn_sqr_words_return
320
321	lw	$12,4($5)
322	multu	$12,$12
323	subu	$6,1
324	mflo	$13
325	mfhi	$12
326	sw	$13,2*4($4)
327	sw	$12,3*4($4)
328	beqz	$6,.L_bn_sqr_words_return
329
330	lw	$12,2*4($5)
331	multu	$12,$12
332	mflo	$13
333	mfhi	$12
334	sw	$13,4*4($4)
335	sw	$12,5*4($4)
336
337.L_bn_sqr_words_return:
338	.set	noreorder
339	jr	$31
340	move	$4,$2
341
342.end	bn_sqr_words_internal
343
344.align	5
345.globl	bn_add_words
346.ent	bn_add_words
347bn_add_words:
348	.set	noreorder
349	bgtz	$7,bn_add_words_internal
350	move	$2,$0
351	jr	$31
352	move	$4,$2
353.end	bn_add_words
354
355.align	5
356.ent	bn_add_words_internal
357bn_add_words_internal:
358	.set	reorder
359	li	$3,-4
360	and	$1,$7,$3
361	lw	$12,0($5)
362	beqz	$1,.L_bn_add_words_tail
363
364.L_bn_add_words_loop:
365	lw	$8,0($6)
366	subu	$7,4
367	lw	$13,4($5)
368	and	$1,$7,$3
369	lw	$14,2*4($5)
370	addu $6,4*4
371	lw	$15,3*4($5)
372	addu $4,4*4
373	lw	$9,-3*4($6)
374	addu $5,4*4
375	lw	$10,-2*4($6)
376	lw	$11,-4($6)
377	addu	$8,$12
378	sltu	$24,$8,$12
379	addu	$12,$8,$2
380	sltu	$2,$12,$8
381	sw	$12,-4*4($4)
382	addu	$2,$24
383
384	addu	$9,$13
385	sltu	$25,$9,$13
386	addu	$13,$9,$2
387	sltu	$2,$13,$9
388	sw	$13,-3*4($4)
389	addu	$2,$25
390
391	addu	$10,$14
392	sltu	$24,$10,$14
393	addu	$14,$10,$2
394	sltu	$2,$14,$10
395	sw	$14,-2*4($4)
396	addu	$2,$24
397
398	addu	$11,$15
399	sltu	$25,$11,$15
400	addu	$15,$11,$2
401	sltu	$2,$15,$11
402	sw	$15,-4($4)
403	addu	$2,$25
404
405	.set	noreorder
406	bgtzl	$1,.L_bn_add_words_loop
407	lw	$12,0($5)
408
409	beqz	$7,.L_bn_add_words_return
410	nop
411
412.L_bn_add_words_tail:
413	.set	reorder
414	lw	$12,0($5)
415	lw	$8,0($6)
416	addu	$8,$12
417	subu	$7,1
418	sltu	$24,$8,$12
419	addu	$12,$8,$2
420	sltu	$2,$12,$8
421	sw	$12,0($4)
422	addu	$2,$24
423	beqz	$7,.L_bn_add_words_return
424
425	lw	$13,4($5)
426	lw	$9,4($6)
427	addu	$9,$13
428	subu	$7,1
429	sltu	$25,$9,$13
430	addu	$13,$9,$2
431	sltu	$2,$13,$9
432	sw	$13,4($4)
433	addu	$2,$25
434	beqz	$7,.L_bn_add_words_return
435
436	lw	$14,2*4($5)
437	lw	$10,2*4($6)
438	addu	$10,$14
439	sltu	$24,$10,$14
440	addu	$14,$10,$2
441	sltu	$2,$14,$10
442	sw	$14,2*4($4)
443	addu	$2,$24
444
445.L_bn_add_words_return:
446	.set	noreorder
447	jr	$31
448	move	$4,$2
449
450.end	bn_add_words_internal
451
452.align	5
453.globl	bn_sub_words
454.ent	bn_sub_words
455bn_sub_words:
456	.set	noreorder
457	bgtz	$7,bn_sub_words_internal
458	move	$2,$0
459	jr	$31
460	move	$4,$0
461.end	bn_sub_words
462
463.align	5
464.ent	bn_sub_words_internal
465bn_sub_words_internal:
466	.set	reorder
467	li	$3,-4
468	and	$1,$7,$3
469	lw	$12,0($5)
470	beqz	$1,.L_bn_sub_words_tail
471
472.L_bn_sub_words_loop:
473	lw	$8,0($6)
474	subu	$7,4
475	lw	$13,4($5)
476	and	$1,$7,$3
477	lw	$14,2*4($5)
478	addu $6,4*4
479	lw	$15,3*4($5)
480	addu $4,4*4
481	lw	$9,-3*4($6)
482	addu $5,4*4
483	lw	$10,-2*4($6)
484	lw	$11,-4($6)
485	sltu	$24,$12,$8
486	subu	$8,$12,$8
487	subu	$12,$8,$2
488	sgtu	$2,$12,$8
489	sw	$12,-4*4($4)
490	addu	$2,$24
491
492	sltu	$25,$13,$9
493	subu	$9,$13,$9
494	subu	$13,$9,$2
495	sgtu	$2,$13,$9
496	sw	$13,-3*4($4)
497	addu	$2,$25
498
499
500	sltu	$24,$14,$10
501	subu	$10,$14,$10
502	subu	$14,$10,$2
503	sgtu	$2,$14,$10
504	sw	$14,-2*4($4)
505	addu	$2,$24
506
507	sltu	$25,$15,$11
508	subu	$11,$15,$11
509	subu	$15,$11,$2
510	sgtu	$2,$15,$11
511	sw	$15,-4($4)
512	addu	$2,$25
513
514	.set	noreorder
515	bgtzl	$1,.L_bn_sub_words_loop
516	lw	$12,0($5)
517
518	beqz	$7,.L_bn_sub_words_return
519	nop
520
521.L_bn_sub_words_tail:
522	.set	reorder
523	lw	$12,0($5)
524	lw	$8,0($6)
525	subu	$7,1
526	sltu	$24,$12,$8
527	subu	$8,$12,$8
528	subu	$12,$8,$2
529	sgtu	$2,$12,$8
530	sw	$12,0($4)
531	addu	$2,$24
532	beqz	$7,.L_bn_sub_words_return
533
534	lw	$13,4($5)
535	subu	$7,1
536	lw	$9,4($6)
537	sltu	$25,$13,$9
538	subu	$9,$13,$9
539	subu	$13,$9,$2
540	sgtu	$2,$13,$9
541	sw	$13,4($4)
542	addu	$2,$25
543	beqz	$7,.L_bn_sub_words_return
544
545	lw	$14,2*4($5)
546	lw	$10,2*4($6)
547	sltu	$24,$14,$10
548	subu	$10,$14,$10
549	subu	$14,$10,$2
550	sgtu	$2,$14,$10
551	sw	$14,2*4($4)
552	addu	$2,$24
553
554.L_bn_sub_words_return:
555	.set	noreorder
556	jr	$31
557	move	$4,$2
558.end	bn_sub_words_internal
559
560.align 5
561.globl	bn_div_3_words
562.ent	bn_div_3_words
563bn_div_3_words:
564	.set	noreorder
565	move	$7,$4		# we know that bn_div_words does not
566				# touch $7, $10, $11 and preserves $6
567				# so that we can save two arguments
568				# and return address in registers
569				# instead of stack:-)
570
571	lw	$4,($7)
572	move	$10,$5
573	bne	$4,$6,bn_div_3_words_internal
574	lw	$5,-4($7)
575	li	$2,-1
576	jr	$31
577	move	$4,$2
578.end	bn_div_3_words
579
580.align	5
581.ent	bn_div_3_words_internal
582bn_div_3_words_internal:
583	.set	reorder
584	move	$11,$31
585	bal	bn_div_words
586	move	$31,$11
587	multu	$10,$2
588	lw	$14,-2*4($7)
589	move	$8,$0
590	mfhi	$13
591	mflo	$12
592	sltu	$24,$13,$5
593.L_bn_div_3_words_inner_loop:
594	bnez	$24,.L_bn_div_3_words_inner_loop_done
595	sgeu	$1,$14,$12
596	seq	$25,$13,$5
597	and	$1,$25
598	sltu	$15,$12,$10
599	addu	$5,$6
600	subu	$13,$15
601	subu	$12,$10
602	sltu	$24,$13,$5
603	sltu	$8,$5,$6
604	or	$24,$8
605	.set	noreorder
606	beqzl	$1,.L_bn_div_3_words_inner_loop
607	subu	$2,1
608	.set	reorder
609.L_bn_div_3_words_inner_loop_done:
610	.set	noreorder
611	jr	$31
612	move	$4,$2
613.end	bn_div_3_words_internal
614
615.align	5
616.globl	bn_div_words
617.ent	bn_div_words
618bn_div_words:
619	.set	noreorder
620	bnez	$6,bn_div_words_internal
621	li	$2,-1		# I would rather signal div-by-zero
622				# which can be done with 'break 7'
623	jr	$31
624	move	$4,$2
625.end	bn_div_words
626
627.align	5
628.ent	bn_div_words_internal
629bn_div_words_internal:
630	move	$3,$0
631	bltz	$6,.L_bn_div_words_body
632	move	$25,$3
633	sll	$6,1
634	bgtz	$6,.-4
635	addu	$25,1
636
637	.set	reorder
638	negu	$13,$25
639	li	$14,-1
640	sll	$14,$13
641	and	$14,$4
642	srl	$1,$5,$13
643	.set	noreorder
644	bnezl	$14,.+8
645	break	6		# signal overflow
646	.set	reorder
647	sll	$4,$25
648	sll	$5,$25
649	or	$4,$1
650.L_bn_div_words_body:
651	srl	$3,$6,4*4	# bits
652	sgeu	$1,$4,$6
653	.set	noreorder
654	bnezl	$1,.+8
655	subu	$4,$6
656	.set	reorder
657
658	li	$8,-1
659	srl	$9,$4,4*4	# bits
660	srl	$8,4*4	# q=0xffffffff
661	beq	$3,$9,.L_bn_div_words_skip_div1
662	divu	$0,$4,$3
663	mflo	$8
664.L_bn_div_words_skip_div1:
665	multu	$6,$8
666	sll	$15,$4,4*4	# bits
667	srl	$1,$5,4*4	# bits
668	or	$15,$1
669	mflo	$12
670	mfhi	$13
671.L_bn_div_words_inner_loop1:
672	sltu	$14,$15,$12
673	seq	$24,$9,$13
674	sltu	$1,$9,$13
675	and	$14,$24
676	sltu	$2,$12,$6
677	or	$1,$14
678	.set	noreorder
679	beqz	$1,.L_bn_div_words_inner_loop1_done
680	subu	$13,$2
681	subu	$12,$6
682	b	.L_bn_div_words_inner_loop1
683	subu	$8,1
684	.set	reorder
685.L_bn_div_words_inner_loop1_done:
686
687	sll	$5,4*4	# bits
688	subu	$4,$15,$12
689	sll	$2,$8,4*4	# bits
690
691	li	$8,-1
692	srl	$9,$4,4*4	# bits
693	srl	$8,4*4	# q=0xffffffff
694	beq	$3,$9,.L_bn_div_words_skip_div2
695	divu	$0,$4,$3
696	mflo	$8
697.L_bn_div_words_skip_div2:
698	multu	$6,$8
699	sll	$15,$4,4*4	# bits
700	srl	$1,$5,4*4	# bits
701	or	$15,$1
702	mflo	$12
703	mfhi	$13
704.L_bn_div_words_inner_loop2:
705	sltu	$14,$15,$12
706	seq	$24,$9,$13
707	sltu	$1,$9,$13
708	and	$14,$24
709	sltu	$3,$12,$6
710	or	$1,$14
711	.set	noreorder
712	beqz	$1,.L_bn_div_words_inner_loop2_done
713	subu	$13,$3
714	subu	$12,$6
715	b	.L_bn_div_words_inner_loop2
716	subu	$8,1
717	.set	reorder
718.L_bn_div_words_inner_loop2_done:
719
720	subu	$4,$15,$12
721	or	$2,$8
722	srl	$3,$4,$25	# $3 contains remainder if anybody wants it
723	srl	$6,$25		# restore $6
724
725	.set	noreorder
726	move	$5,$3
727	jr	$31
728	move	$4,$2
729.end	bn_div_words_internal
730
731.align	5
732.globl	bn_mul_comba8
733.ent	bn_mul_comba8
734bn_mul_comba8:
735	.set	noreorder
736	.frame	$29,6*4,$31
737	.mask	0x003f0000,-4
738	subu $29,6*4
739	sw	$21,5*4($29)
740	sw	$20,4*4($29)
741	sw	$19,3*4($29)
742	sw	$18,2*4($29)
743	sw	$17,1*4($29)
744	sw	$16,0*4($29)
745
746	.set	reorder
747	lw	$12,0($5)	# If compiled with -mips3 option on
748				# R5000 box assembler barks on this
749				# 1ine with "should not have mult/div
750				# as last instruction in bb (R10K
751				# bug)" warning. If anybody out there
752				# has a clue about how to circumvent
753				# this do send me a note.
754				#		<appro@fy.chalmers.se>
755
756	lw	$8,0($6)
757	lw	$13,4($5)
758	lw	$14,2*4($5)
759	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
760	lw	$15,3*4($5)
761	lw	$9,4($6)
762	lw	$10,2*4($6)
763	lw	$11,3*4($6)
764	mflo	$2
765	mfhi	$3
766
767	lw	$16,4*4($5)
768	lw	$18,5*4($5)
769	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
770	lw	$20,6*4($5)
771	lw	$5,7*4($5)
772	lw	$17,4*4($6)
773	lw	$19,5*4($6)
774	mflo	$24
775	mfhi	$25
776	addu	$3,$24
777	sltu	$1,$3,$24
778	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
779	addu	$7,$25,$1
780	lw	$21,6*4($6)
781	lw	$6,7*4($6)
782	sw	$2,0($4)	# r[0]=c1;
783	mflo	$24
784	mfhi	$25
785	addu	$3,$24
786	sltu	$1,$3,$24
787	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
788	addu	$25,$1
789	addu	$7,$25
790	sltu	$2,$7,$25
791	sw	$3,4($4)	# r[1]=c2;
792
793	mflo	$24
794	mfhi	$25
795	addu	$7,$24
796	sltu	$1,$7,$24
797	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
798	addu	$25,$1
799	addu	$2,$25
800	mflo	$24
801	mfhi	$25
802	addu	$7,$24
803	sltu	$1,$7,$24
804	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
805	addu	$25,$1
806	addu	$2,$25
807	sltu	$3,$2,$25
808	mflo	$24
809	mfhi	$25
810	addu	$7,$24
811	sltu	$1,$7,$24
812	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
813	addu	$25,$1
814	addu	$2,$25
815	sltu	$1,$2,$25
816	addu	$3,$1
817	sw	$7,2*4($4)	# r[2]=c3;
818
819	mflo	$24
820	mfhi	$25
821	addu	$2,$24
822	sltu	$1,$2,$24
823	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
824	addu	$25,$1
825	addu	$3,$25
826	sltu	$7,$3,$25
827	mflo	$24
828	mfhi	$25
829	addu	$2,$24
830	sltu	$1,$2,$24
831	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
832	addu	$25,$1
833	addu	$3,$25
834	sltu	$1,$3,$25
835	addu	$7,$1
836	mflo	$24
837	mfhi	$25
838	addu	$2,$24
839	sltu	$1,$2,$24
840	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
841	addu	$25,$1
842	addu	$3,$25
843	sltu	$1,$3,$25
844	addu	$7,$1
845	mflo	$24
846	mfhi	$25
847	addu	$2,$24
848	sltu	$1,$2,$24
849	 multu	$16,$8		# mul_add_c(a[4],b[0],c2,c3,c1);
850	addu	$25,$1
851	addu	$3,$25
852	sltu	$1,$3,$25
853	addu	$7,$1
854	sw	$2,3*4($4)	# r[3]=c1;
855
856	mflo	$24
857	mfhi	$25
858	addu	$3,$24
859	sltu	$1,$3,$24
860	multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
861	addu	$25,$1
862	addu	$7,$25
863	sltu	$2,$7,$25
864	mflo	$24
865	mfhi	$25
866	addu	$3,$24
867	sltu	$1,$3,$24
868	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
869	addu	$25,$1
870	addu	$7,$25
871	sltu	$1,$7,$25
872	addu	$2,$1
873	mflo	$24
874	mfhi	$25
875	addu	$3,$24
876	sltu	$1,$3,$24
877	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
878	addu	$25,$1
879	addu	$7,$25
880	sltu	$1,$7,$25
881	addu	$2,$1
882	mflo	$24
883	mfhi	$25
884	addu	$3,$24
885	sltu	$1,$3,$24
886	multu	$12,$17		# mul_add_c(a[0],b[4],c2,c3,c1);
887	addu	$25,$1
888	addu	$7,$25
889	sltu	$1,$7,$25
890	addu	$2,$1
891	mflo	$24
892	mfhi	$25
893	addu	$3,$24
894	sltu	$1,$3,$24
895	 multu	$12,$19		# mul_add_c(a[0],b[5],c3,c1,c2);
896	addu	$25,$1
897	addu	$7,$25
898	sltu	$1,$7,$25
899	addu	$2,$1
900	sw	$3,4*4($4)	# r[4]=c2;
901
902	mflo	$24
903	mfhi	$25
904	addu	$7,$24
905	sltu	$1,$7,$24
906	multu	$13,$17		# mul_add_c(a[1],b[4],c3,c1,c2);
907	addu	$25,$1
908	addu	$2,$25
909	sltu	$3,$2,$25
910	mflo	$24
911	mfhi	$25
912	addu	$7,$24
913	sltu	$1,$7,$24
914	multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
915	addu	$25,$1
916	addu	$2,$25
917	sltu	$1,$2,$25
918	addu	$3,$1
919	mflo	$24
920	mfhi	$25
921	addu	$7,$24
922	sltu	$1,$7,$24
923	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
924	addu	$25,$1
925	addu	$2,$25
926	sltu	$1,$2,$25
927	addu	$3,$1
928	mflo	$24
929	mfhi	$25
930	addu	$7,$24
931	sltu	$1,$7,$24
932	multu	$16,$9		# mul_add_c(a[4],b[1],c3,c1,c2);
933	addu	$25,$1
934	addu	$2,$25
935	sltu	$1,$2,$25
936	addu	$3,$1
937	mflo	$24
938	mfhi	$25
939	addu	$7,$24
940	sltu	$1,$7,$24
941	multu	$18,$8		# mul_add_c(a[5],b[0],c3,c1,c2);
942	addu	$25,$1
943	addu	$2,$25
944	sltu	$1,$2,$25
945	addu	$3,$1
946	mflo	$24
947	mfhi	$25
948	addu	$7,$24
949	sltu	$1,$7,$24
950	 multu	$20,$8		# mul_add_c(a[6],b[0],c1,c2,c3);
951	addu	$25,$1
952	addu	$2,$25
953	sltu	$1,$2,$25
954	addu	$3,$1
955	sw	$7,5*4($4)	# r[5]=c3;
956
957	mflo	$24
958	mfhi	$25
959	addu	$2,$24
960	sltu	$1,$2,$24
961	multu	$18,$9		# mul_add_c(a[5],b[1],c1,c2,c3);
962	addu	$25,$1
963	addu	$3,$25
964	sltu	$7,$3,$25
965	mflo	$24
966	mfhi	$25
967	addu	$2,$24
968	sltu	$1,$2,$24
969	multu	$16,$10		# mul_add_c(a[4],b[2],c1,c2,c3);
970	addu	$25,$1
971	addu	$3,$25
972	sltu	$1,$3,$25
973	addu	$7,$1
974	mflo	$24
975	mfhi	$25
976	addu	$2,$24
977	sltu	$1,$2,$24
978	multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
979	addu	$25,$1
980	addu	$3,$25
981	sltu	$1,$3,$25
982	addu	$7,$1
983	mflo	$24
984	mfhi	$25
985	addu	$2,$24
986	sltu	$1,$2,$24
987	multu	$14,$17		# mul_add_c(a[2],b[4],c1,c2,c3);
988	addu	$25,$1
989	addu	$3,$25
990	sltu	$1,$3,$25
991	addu	$7,$1
992	mflo	$24
993	mfhi	$25
994	addu	$2,$24
995	sltu	$1,$2,$24
996	multu	$13,$19		# mul_add_c(a[1],b[5],c1,c2,c3);
997	addu	$25,$1
998	addu	$3,$25
999	sltu	$1,$3,$25
1000	addu	$7,$1
1001	mflo	$24
1002	mfhi	$25
1003	addu	$2,$24
1004	sltu	$1,$2,$24
1005	multu	$12,$21		# mul_add_c(a[0],b[6],c1,c2,c3);
1006	addu	$25,$1
1007	addu	$3,$25
1008	sltu	$1,$3,$25
1009	addu	$7,$1
1010	mflo	$24
1011	mfhi	$25
1012	addu	$2,$24
1013	sltu	$1,$2,$24
1014	 multu	$12,$6		# mul_add_c(a[0],b[7],c2,c3,c1);
1015	addu	$25,$1
1016	addu	$3,$25
1017	sltu	$1,$3,$25
1018	addu	$7,$1
1019	sw	$2,6*4($4)	# r[6]=c1;
1020
1021	mflo	$24
1022	mfhi	$25
1023	addu	$3,$24
1024	sltu	$1,$3,$24
1025	multu	$13,$21		# mul_add_c(a[1],b[6],c2,c3,c1);
1026	addu	$25,$1
1027	addu	$7,$25
1028	sltu	$2,$7,$25
1029	mflo	$24
1030	mfhi	$25
1031	addu	$3,$24
1032	sltu	$1,$3,$24
1033	multu	$14,$19		# mul_add_c(a[2],b[5],c2,c3,c1);
1034	addu	$25,$1
1035	addu	$7,$25
1036	sltu	$1,$7,$25
1037	addu	$2,$1
1038	mflo	$24
1039	mfhi	$25
1040	addu	$3,$24
1041	sltu	$1,$3,$24
1042	multu	$15,$17		# mul_add_c(a[3],b[4],c2,c3,c1);
1043	addu	$25,$1
1044	addu	$7,$25
1045	sltu	$1,$7,$25
1046	addu	$2,$1
1047	mflo	$24
1048	mfhi	$25
1049	addu	$3,$24
1050	sltu	$1,$3,$24
1051	multu	$16,$11		# mul_add_c(a[4],b[3],c2,c3,c1);
1052	addu	$25,$1
1053	addu	$7,$25
1054	sltu	$1,$7,$25
1055	addu	$2,$1
1056	mflo	$24
1057	mfhi	$25
1058	addu	$3,$24
1059	sltu	$1,$3,$24
1060	multu	$18,$10		# mul_add_c(a[5],b[2],c2,c3,c1);
1061	addu	$25,$1
1062	addu	$7,$25
1063	sltu	$1,$7,$25
1064	addu	$2,$1
1065	mflo	$24
1066	mfhi	$25
1067	addu	$3,$24
1068	sltu	$1,$3,$24
1069	multu	$20,$9		# mul_add_c(a[6],b[1],c2,c3,c1);
1070	addu	$25,$1
1071	addu	$7,$25
1072	sltu	$1,$7,$25
1073	addu	$2,$1
1074	mflo	$24
1075	mfhi	$25
1076	addu	$3,$24
1077	sltu	$1,$3,$24
1078	multu	$5,$8		# mul_add_c(a[7],b[0],c2,c3,c1);
1079	addu	$25,$1
1080	addu	$7,$25
1081	sltu	$1,$7,$25
1082	addu	$2,$1
1083	mflo	$24
1084	mfhi	$25
1085	addu	$3,$24
1086	sltu	$1,$3,$24
1087	 multu	$5,$9		# mul_add_c(a[7],b[1],c3,c1,c2);
1088	addu	$25,$1
1089	addu	$7,$25
1090	sltu	$1,$7,$25
1091	addu	$2,$1
1092	sw	$3,7*4($4)	# r[7]=c2;
1093
1094	mflo	$24
1095	mfhi	$25
1096	addu	$7,$24
1097	sltu	$1,$7,$24
1098	multu	$20,$10		# mul_add_c(a[6],b[2],c3,c1,c2);
1099	addu	$25,$1
1100	addu	$2,$25
1101	sltu	$3,$2,$25
1102	mflo	$24
1103	mfhi	$25
1104	addu	$7,$24
1105	sltu	$1,$7,$24
1106	multu	$18,$11		# mul_add_c(a[5],b[3],c3,c1,c2);
1107	addu	$25,$1
1108	addu	$2,$25
1109	sltu	$1,$2,$25
1110	addu	$3,$1
1111	mflo	$24
1112	mfhi	$25
1113	addu	$7,$24
1114	sltu	$1,$7,$24
1115	multu	$16,$17		# mul_add_c(a[4],b[4],c3,c1,c2);
1116	addu	$25,$1
1117	addu	$2,$25
1118	sltu	$1,$2,$25
1119	addu	$3,$1
1120	mflo	$24
1121	mfhi	$25
1122	addu	$7,$24
1123	sltu	$1,$7,$24
1124	multu	$15,$19		# mul_add_c(a[3],b[5],c3,c1,c2);
1125	addu	$25,$1
1126	addu	$2,$25
1127	sltu	$1,$2,$25
1128	addu	$3,$1
1129	mflo	$24
1130	mfhi	$25
1131	addu	$7,$24
1132	sltu	$1,$7,$24
1133	multu	$14,$21		# mul_add_c(a[2],b[6],c3,c1,c2);
1134	addu	$25,$1
1135	addu	$2,$25
1136	sltu	$1,$2,$25
1137	addu	$3,$1
1138	mflo	$24
1139	mfhi	$25
1140	addu	$7,$24
1141	sltu	$1,$7,$24
1142	multu	$13,$6		# mul_add_c(a[1],b[7],c3,c1,c2);
1143	addu	$25,$1
1144	addu	$2,$25
1145	sltu	$1,$2,$25
1146	addu	$3,$1
1147	mflo	$24
1148	mfhi	$25
1149	addu	$7,$24
1150	sltu	$1,$7,$24
1151	 multu	$14,$6		# mul_add_c(a[2],b[7],c1,c2,c3);
1152	addu	$25,$1
1153	addu	$2,$25
1154	sltu	$1,$2,$25
1155	addu	$3,$1
1156	sw	$7,8*4($4)	# r[8]=c3;
1157
1158	mflo	$24
1159	mfhi	$25
1160	addu	$2,$24
1161	sltu	$1,$2,$24
1162	multu	$15,$21		# mul_add_c(a[3],b[6],c1,c2,c3);
1163	addu	$25,$1
1164	addu	$3,$25
1165	sltu	$7,$3,$25
1166	mflo	$24
1167	mfhi	$25
1168	addu	$2,$24
1169	sltu	$1,$2,$24
1170	multu	$16,$19		# mul_add_c(a[4],b[5],c1,c2,c3);
1171	addu	$25,$1
1172	addu	$3,$25
1173	sltu	$1,$3,$25
1174	addu	$7,$1
1175	mflo	$24
1176	mfhi	$25
1177	addu	$2,$24
1178	sltu	$1,$2,$24
1179	multu	$18,$17		# mul_add_c(a[5],b[4],c1,c2,c3);
1180	addu	$25,$1
1181	addu	$3,$25
1182	sltu	$1,$3,$25
1183	addu	$7,$1
1184	mflo	$24
1185	mfhi	$25
1186	addu	$2,$24
1187	sltu	$1,$2,$24
1188	multu	$20,$11		# mul_add_c(a[6],b[3],c1,c2,c3);
1189	addu	$25,$1
1190	addu	$3,$25
1191	sltu	$1,$3,$25
1192	addu	$7,$1
1193	mflo	$24
1194	mfhi	$25
1195	addu	$2,$24
1196	sltu	$1,$2,$24
1197	multu	$5,$10		# mul_add_c(a[7],b[2],c1,c2,c3);
1198	addu	$25,$1
1199	addu	$3,$25
1200	sltu	$1,$3,$25
1201	addu	$7,$1
1202	mflo	$24
1203	mfhi	$25
1204	addu	$2,$24
1205	sltu	$1,$2,$24
1206	 multu	$5,$11		# mul_add_c(a[7],b[3],c2,c3,c1);
1207	addu	$25,$1
1208	addu	$3,$25
1209	sltu	$1,$3,$25
1210	addu	$7,$1
1211	sw	$2,9*4($4)	# r[9]=c1;
1212
1213	mflo	$24
1214	mfhi	$25
1215	addu	$3,$24
1216	sltu	$1,$3,$24
1217	multu	$20,$17		# mul_add_c(a[6],b[4],c2,c3,c1);
1218	addu	$25,$1
1219	addu	$7,$25
1220	sltu	$2,$7,$25
1221	mflo	$24
1222	mfhi	$25
1223	addu	$3,$24
1224	sltu	$1,$3,$24
1225	multu	$18,$19		# mul_add_c(a[5],b[5],c2,c3,c1);
1226	addu	$25,$1
1227	addu	$7,$25
1228	sltu	$1,$7,$25
1229	addu	$2,$1
1230	mflo	$24
1231	mfhi	$25
1232	addu	$3,$24
1233	sltu	$1,$3,$24
1234	multu	$16,$21		# mul_add_c(a[4],b[6],c2,c3,c1);
1235	addu	$25,$1
1236	addu	$7,$25
1237	sltu	$1,$7,$25
1238	addu	$2,$1
1239	mflo	$24
1240	mfhi	$25
1241	addu	$3,$24
1242	sltu	$1,$3,$24
1243	multu	$15,$6		# mul_add_c(a[3],b[7],c2,c3,c1);
1244	addu	$25,$1
1245	addu	$7,$25
1246	sltu	$1,$7,$25
1247	addu	$2,$1
1248	mflo	$24
1249	mfhi	$25
1250	addu	$3,$24
1251	sltu	$1,$3,$24
1252	multu	$16,$6		# mul_add_c(a[4],b[7],c3,c1,c2);
1253	addu	$25,$1
1254	addu	$7,$25
1255	sltu	$1,$7,$25
1256	addu	$2,$1
1257	sw	$3,10*4($4)	# r[10]=c2;
1258
1259	mflo	$24
1260	mfhi	$25
1261	addu	$7,$24
1262	sltu	$1,$7,$24
1263	multu	$18,$21		# mul_add_c(a[5],b[6],c3,c1,c2);
1264	addu	$25,$1
1265	addu	$2,$25
1266	sltu	$3,$2,$25
1267	mflo	$24
1268	mfhi	$25
1269	addu	$7,$24
1270	sltu	$1,$7,$24
1271	multu	$20,$19		# mul_add_c(a[6],b[5],c3,c1,c2);
1272	addu	$25,$1
1273	addu	$2,$25
1274	sltu	$1,$2,$25
1275	addu	$3,$1
1276	mflo	$24
1277	mfhi	$25
1278	addu	$7,$24
1279	sltu	$1,$7,$24
1280	multu	$5,$17		# mul_add_c(a[7],b[4],c3,c1,c2);
1281	addu	$25,$1
1282	addu	$2,$25
1283	sltu	$1,$2,$25
1284	addu	$3,$1
1285	mflo	$24
1286	mfhi	$25
1287	addu	$7,$24
1288	sltu	$1,$7,$24
1289	 multu	$5,$19		# mul_add_c(a[7],b[5],c1,c2,c3);
1290	addu	$25,$1
1291	addu	$2,$25
1292	sltu	$1,$2,$25
1293	addu	$3,$1
1294	sw	$7,11*4($4)	# r[11]=c3;
1295
1296	mflo	$24
1297	mfhi	$25
1298	addu	$2,$24
1299	sltu	$1,$2,$24
1300	multu	$20,$21		# mul_add_c(a[6],b[6],c1,c2,c3);
1301	addu	$25,$1
1302	addu	$3,$25
1303	sltu	$7,$3,$25
1304	mflo	$24
1305	mfhi	$25
1306	addu	$2,$24
1307	sltu	$1,$2,$24
1308	multu	$18,$6		# mul_add_c(a[5],b[7],c1,c2,c3);
1309	addu	$25,$1
1310	addu	$3,$25
1311	sltu	$1,$3,$25
1312	addu	$7,$1
1313	mflo	$24
1314	mfhi	$25
1315	addu	$2,$24
1316	sltu	$1,$2,$24
1317	 multu	$20,$6		# mul_add_c(a[6],b[7],c2,c3,c1);
1318	addu	$25,$1
1319	addu	$3,$25
1320	sltu	$1,$3,$25
1321	addu	$7,$1
1322	sw	$2,12*4($4)	# r[12]=c1;
1323
1324	mflo	$24
1325	mfhi	$25
1326	addu	$3,$24
1327	sltu	$1,$3,$24
1328	multu	$5,$21		# mul_add_c(a[7],b[6],c2,c3,c1);
1329	addu	$25,$1
1330	addu	$7,$25
1331	sltu	$2,$7,$25
1332	mflo	$24
1333	mfhi	$25
1334	addu	$3,$24
1335	sltu	$1,$3,$24
1336	multu	$5,$6		# mul_add_c(a[7],b[7],c3,c1,c2);
1337	addu	$25,$1
1338	addu	$7,$25
1339	sltu	$1,$7,$25
1340	addu	$2,$1
1341	sw	$3,13*4($4)	# r[13]=c2;
1342
1343	mflo	$24
1344	mfhi	$25
1345	addu	$7,$24
1346	sltu	$1,$7,$24
1347	addu	$25,$1
1348	addu	$2,$25
1349	sw	$7,14*4($4)	# r[14]=c3;
1350	sw	$2,15*4($4)	# r[15]=c1;
1351
1352	.set	noreorder
1353	lw	$21,5*4($29)
1354	lw	$20,4*4($29)
1355	lw	$19,3*4($29)
1356	lw	$18,2*4($29)
1357	lw	$17,1*4($29)
1358	lw	$16,0*4($29)
1359	jr	$31
1360	addu $29,6*4
1361.end	bn_mul_comba8
1362
1363.align	5
1364.globl	bn_mul_comba4
1365.ent	bn_mul_comba4
1366bn_mul_comba4:
1367	.set	reorder
1368	lw	$12,0($5)
1369	lw	$8,0($6)
1370	lw	$13,4($5)
1371	lw	$14,2*4($5)
1372	multu	$12,$8		# mul_add_c(a[0],b[0],c1,c2,c3);
1373	lw	$15,3*4($5)
1374	lw	$9,4($6)
1375	lw	$10,2*4($6)
1376	lw	$11,3*4($6)
1377	mflo	$2
1378	mfhi	$3
1379	sw	$2,0($4)
1380
1381	multu	$12,$9		# mul_add_c(a[0],b[1],c2,c3,c1);
1382	mflo	$24
1383	mfhi	$25
1384	addu	$3,$24
1385	sltu	$1,$3,$24
1386	multu	$13,$8		# mul_add_c(a[1],b[0],c2,c3,c1);
1387	addu	$7,$25,$1
1388	mflo	$24
1389	mfhi	$25
1390	addu	$3,$24
1391	sltu	$1,$3,$24
1392	 multu	$14,$8		# mul_add_c(a[2],b[0],c3,c1,c2);
1393	addu	$25,$1
1394	addu	$7,$25
1395	sltu	$2,$7,$25
1396	sw	$3,4($4)
1397
1398	mflo	$24
1399	mfhi	$25
1400	addu	$7,$24
1401	sltu	$1,$7,$24
1402	multu	$13,$9		# mul_add_c(a[1],b[1],c3,c1,c2);
1403	addu	$25,$1
1404	addu	$2,$25
1405	mflo	$24
1406	mfhi	$25
1407	addu	$7,$24
1408	sltu	$1,$7,$24
1409	multu	$12,$10		# mul_add_c(a[0],b[2],c3,c1,c2);
1410	addu	$25,$1
1411	addu	$2,$25
1412	sltu	$3,$2,$25
1413	mflo	$24
1414	mfhi	$25
1415	addu	$7,$24
1416	sltu	$1,$7,$24
1417	 multu	$12,$11		# mul_add_c(a[0],b[3],c1,c2,c3);
1418	addu	$25,$1
1419	addu	$2,$25
1420	sltu	$1,$2,$25
1421	addu	$3,$1
1422	sw	$7,2*4($4)
1423
1424	mflo	$24
1425	mfhi	$25
1426	addu	$2,$24
1427	sltu	$1,$2,$24
1428	multu	$13,$10		# mul_add_c(a[1],b[2],c1,c2,c3);
1429	addu	$25,$1
1430	addu	$3,$25
1431	sltu	$7,$3,$25
1432	mflo	$24
1433	mfhi	$25
1434	addu	$2,$24
1435	sltu	$1,$2,$24
1436	multu	$14,$9		# mul_add_c(a[2],b[1],c1,c2,c3);
1437	addu	$25,$1
1438	addu	$3,$25
1439	sltu	$1,$3,$25
1440	addu	$7,$1
1441	mflo	$24
1442	mfhi	$25
1443	addu	$2,$24
1444	sltu	$1,$2,$24
1445	multu	$15,$8		# mul_add_c(a[3],b[0],c1,c2,c3);
1446	addu	$25,$1
1447	addu	$3,$25
1448	sltu	$1,$3,$25
1449	addu	$7,$1
1450	mflo	$24
1451	mfhi	$25
1452	addu	$2,$24
1453	sltu	$1,$2,$24
1454	 multu	$15,$9		# mul_add_c(a[3],b[1],c2,c3,c1);
1455	addu	$25,$1
1456	addu	$3,$25
1457	sltu	$1,$3,$25
1458	addu	$7,$1
1459	sw	$2,3*4($4)
1460
1461	mflo	$24
1462	mfhi	$25
1463	addu	$3,$24
1464	sltu	$1,$3,$24
1465	multu	$14,$10		# mul_add_c(a[2],b[2],c2,c3,c1);
1466	addu	$25,$1
1467	addu	$7,$25
1468	sltu	$2,$7,$25
1469	mflo	$24
1470	mfhi	$25
1471	addu	$3,$24
1472	sltu	$1,$3,$24
1473	multu	$13,$11		# mul_add_c(a[1],b[3],c2,c3,c1);
1474	addu	$25,$1
1475	addu	$7,$25
1476	sltu	$1,$7,$25
1477	addu	$2,$1
1478	mflo	$24
1479	mfhi	$25
1480	addu	$3,$24
1481	sltu	$1,$3,$24
1482	 multu	$14,$11		# mul_add_c(a[2],b[3],c3,c1,c2);
1483	addu	$25,$1
1484	addu	$7,$25
1485	sltu	$1,$7,$25
1486	addu	$2,$1
1487	sw	$3,4*4($4)
1488
1489	mflo	$24
1490	mfhi	$25
1491	addu	$7,$24
1492	sltu	$1,$7,$24
1493	multu	$15,$10		# mul_add_c(a[3],b[2],c3,c1,c2);
1494	addu	$25,$1
1495	addu	$2,$25
1496	sltu	$3,$2,$25
1497	mflo	$24
1498	mfhi	$25
1499	addu	$7,$24
1500	sltu	$1,$7,$24
1501	 multu	$15,$11		# mul_add_c(a[3],b[3],c1,c2,c3);
1502	addu	$25,$1
1503	addu	$2,$25
1504	sltu	$1,$2,$25
1505	addu	$3,$1
1506	sw	$7,5*4($4)
1507
1508	mflo	$24
1509	mfhi	$25
1510	addu	$2,$24
1511	sltu	$1,$2,$24
1512	addu	$25,$1
1513	addu	$3,$25
1514	sw	$2,6*4($4)
1515	sw	$3,7*4($4)
1516
1517	.set	noreorder
1518	jr	$31
1519	nop
1520.end	bn_mul_comba4
1521
1522.align	5
1523.globl	bn_sqr_comba8
1524.ent	bn_sqr_comba8
1525bn_sqr_comba8:
1526	.set	reorder
1527	lw	$12,0($5)
1528	lw	$13,4($5)
1529	lw	$14,2*4($5)
1530	lw	$15,3*4($5)
1531
1532	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
1533	lw	$8,4*4($5)
1534	lw	$9,5*4($5)
1535	lw	$10,6*4($5)
1536	lw	$11,7*4($5)
1537	mflo	$2
1538	mfhi	$3
1539	sw	$2,0($4)
1540
1541	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
1542	mflo	$24
1543	mfhi	$25
1544	slt	$2,$25,$0
1545	sll	$25,1
1546	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
1547	slt	$6,$24,$0
1548	addu	$25,$6
1549	sll	$24,1
1550	addu	$3,$24
1551	sltu	$1,$3,$24
1552	addu	$7,$25,$1
1553	sw	$3,4($4)
1554
1555	mflo	$24
1556	mfhi	$25
1557	slt	$3,$25,$0
1558	sll	$25,1
1559	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
1560	slt	$6,$24,$0
1561	addu	$25,$6
1562	sll	$24,1
1563	addu	$7,$24
1564	sltu	$1,$7,$24
1565	addu	$25,$1
1566	addu	$2,$25
1567	sltu	$1,$2,$25
1568	addu	$3,$1
1569	mflo	$24
1570	mfhi	$25
1571	addu	$7,$24
1572	sltu	$1,$7,$24
1573	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
1574	addu	$25,$1
1575	addu	$2,$25
1576	sltu	$1,$2,$25
1577	addu	$3,$1
1578	sw	$7,2*4($4)
1579
1580	mflo	$24
1581	mfhi	$25
1582	slt	$7,$25,$0
1583	sll	$25,1
1584	multu	$13,$14		# mul_add_c2(a[1],b[2],c1,c2,c3);
1585	slt	$6,$24,$0
1586	addu	$25,$6
1587	sll	$24,1
1588	addu	$2,$24
1589	sltu	$1,$2,$24
1590	addu	$25,$1
1591	addu	$3,$25
1592	sltu	$1,$3,$25
1593	addu	$7,$1
1594	mflo	$24
1595	mfhi	$25
1596	slt	$1,$25,$0
1597	addu	$7,$1
1598	 multu	$8,$12		# mul_add_c2(a[4],b[0],c2,c3,c1);
1599	sll	$25,1
1600	slt	$6,$24,$0
1601	addu	$25,$6
1602	sll	$24,1
1603	addu	$2,$24
1604	sltu	$1,$2,$24
1605	addu	$25,$1
1606	addu	$3,$25
1607	sltu	$1,$3,$25
1608	addu	$7,$1
1609	sw	$2,3*4($4)
1610
1611	mflo	$24
1612	mfhi	$25
1613	slt	$2,$25,$0
1614	sll	$25,1
1615	multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
1616	slt	$6,$24,$0
1617	addu	$25,$6
1618	sll	$24,1
1619	addu	$3,$24
1620	sltu	$1,$3,$24
1621	addu	$25,$1
1622	addu	$7,$25
1623	sltu	$1,$7,$25
1624	addu	$2,$1
1625	mflo	$24
1626	mfhi	$25
1627	slt	$1,$25,$0
1628	addu	$2,$1
1629	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
1630	sll	$25,1
1631	slt	$6,$24,$0
1632	addu	$25,$6
1633	sll	$24,1
1634	addu	$3,$24
1635	sltu	$1,$3,$24
1636	addu	$25,$1
1637	addu	$7,$25
1638	sltu	$1,$7,$25
1639	addu	$2,$1
1640	mflo	$24
1641	mfhi	$25
1642	addu	$3,$24
1643	sltu	$1,$3,$24
1644	 multu	$12,$9		# mul_add_c2(a[0],b[5],c3,c1,c2);
1645	addu	$25,$1
1646	addu	$7,$25
1647	sltu	$1,$7,$25
1648	addu	$2,$1
1649	sw	$3,4*4($4)
1650
1651	mflo	$24
1652	mfhi	$25
1653	slt	$3,$25,$0
1654	sll	$25,1
1655	multu	$13,$8		# mul_add_c2(a[1],b[4],c3,c1,c2);
1656	slt	$6,$24,$0
1657	addu	$25,$6
1658	sll	$24,1
1659	addu	$7,$24
1660	sltu	$1,$7,$24
1661	addu	$25,$1
1662	addu	$2,$25
1663	sltu	$1,$2,$25
1664	addu	$3,$1
1665	mflo	$24
1666	mfhi	$25
1667	slt	$1,$25,$0
1668	addu	$3,$1
1669	multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
1670	sll	$25,1
1671	slt	$6,$24,$0
1672	addu	$25,$6
1673	sll	$24,1
1674	addu	$7,$24
1675	sltu	$1,$7,$24
1676	addu	$25,$1
1677	addu	$2,$25
1678	sltu	$1,$2,$25
1679	addu	$3,$1
1680	mflo	$24
1681	mfhi	$25
1682	slt	$1,$25,$0
1683	 multu	$10,$12		# mul_add_c2(a[6],b[0],c1,c2,c3);
1684	addu	$3,$1
1685	sll	$25,1
1686	slt	$6,$24,$0
1687	addu	$25,$6
1688	sll	$24,1
1689	addu	$7,$24
1690	sltu	$1,$7,$24
1691	addu	$25,$1
1692	addu	$2,$25
1693	sltu	$1,$2,$25
1694	addu	$3,$1
1695	sw	$7,5*4($4)
1696
1697	mflo	$24
1698	mfhi	$25
1699	slt	$7,$25,$0
1700	sll	$25,1
1701	multu	$9,$13		# mul_add_c2(a[5],b[1],c1,c2,c3);
1702	slt	$6,$24,$0
1703	addu	$25,$6
1704	sll	$24,1
1705	addu	$2,$24
1706	sltu	$1,$2,$24
1707	addu	$25,$1
1708	addu	$3,$25
1709	sltu	$1,$3,$25
1710	addu	$7,$1
1711	mflo	$24
1712	mfhi	$25
1713	slt	$1,$25,$0
1714	addu	$7,$1
1715	multu	$8,$14		# mul_add_c2(a[4],b[2],c1,c2,c3);
1716	sll	$25,1
1717	slt	$6,$24,$0
1718	addu	$25,$6
1719	sll	$24,1
1720	addu	$2,$24
1721	sltu	$1,$2,$24
1722	addu	$25,$1
1723	addu	$3,$25
1724	sltu	$1,$3,$25
1725	addu	$7,$1
1726	mflo	$24
1727	mfhi	$25
1728	slt	$1,$25,$0
1729	addu	$7,$1
1730	multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
1731	sll	$25,1
1732	slt	$6,$24,$0
1733	addu	$25,$6
1734	sll	$24,1
1735	addu	$2,$24
1736	sltu	$1,$2,$24
1737	addu	$25,$1
1738	addu	$3,$25
1739	sltu	$1,$3,$25
1740	addu	$7,$1
1741	mflo	$24
1742	mfhi	$25
1743	addu	$2,$24
1744	sltu	$1,$2,$24
1745	 multu	$12,$11		# mul_add_c2(a[0],b[7],c2,c3,c1);
1746	addu	$25,$1
1747	addu	$3,$25
1748	sltu	$1,$3,$25
1749	addu	$7,$1
1750	sw	$2,6*4($4)
1751
1752	mflo	$24
1753	mfhi	$25
1754	slt	$2,$25,$0
1755	sll	$25,1
1756	multu	$13,$10		# mul_add_c2(a[1],b[6],c2,c3,c1);
1757	slt	$6,$24,$0
1758	addu	$25,$6
1759	sll	$24,1
1760	addu	$3,$24
1761	sltu	$1,$3,$24
1762	addu	$25,$1
1763	addu	$7,$25
1764	sltu	$1,$7,$25
1765	addu	$2,$1
1766	mflo	$24
1767	mfhi	$25
1768	slt	$1,$25,$0
1769	addu	$2,$1
1770	multu	$14,$9		# mul_add_c2(a[2],b[5],c2,c3,c1);
1771	sll	$25,1
1772	slt	$6,$24,$0
1773	addu	$25,$6
1774	sll	$24,1
1775	addu	$3,$24
1776	sltu	$1,$3,$24
1777	addu	$25,$1
1778	addu	$7,$25
1779	sltu	$1,$7,$25
1780	addu	$2,$1
1781	mflo	$24
1782	mfhi	$25
1783	slt	$1,$25,$0
1784	addu	$2,$1
1785	multu	$15,$8		# mul_add_c2(a[3],b[4],c2,c3,c1);
1786	sll	$25,1
1787	slt	$6,$24,$0
1788	addu	$25,$6
1789	sll	$24,1
1790	addu	$3,$24
1791	sltu	$1,$3,$24
1792	addu	$25,$1
1793	addu	$7,$25
1794	sltu	$1,$7,$25
1795	addu	$2,$1
1796	mflo	$24
1797	mfhi	$25
1798	slt	$1,$25,$0
1799	addu	$2,$1
1800	 multu	$11,$13		# mul_add_c2(a[7],b[1],c3,c1,c2);
1801	sll	$25,1
1802	slt	$6,$24,$0
1803	addu	$25,$6
1804	sll	$24,1
1805	addu	$3,$24
1806	sltu	$1,$3,$24
1807	addu	$25,$1
1808	addu	$7,$25
1809	sltu	$1,$7,$25
1810	addu	$2,$1
1811	sw	$3,7*4($4)
1812
1813	mflo	$24
1814	mfhi	$25
1815	slt	$3,$25,$0
1816	sll	$25,1
1817	multu	$10,$14		# mul_add_c2(a[6],b[2],c3,c1,c2);
1818	slt	$6,$24,$0
1819	addu	$25,$6
1820	sll	$24,1
1821	addu	$7,$24
1822	sltu	$1,$7,$24
1823	addu	$25,$1
1824	addu	$2,$25
1825	sltu	$1,$2,$25
1826	addu	$3,$1
1827	mflo	$24
1828	mfhi	$25
1829	slt	$1,$25,$0
1830	addu	$3,$1
1831	multu	$9,$15		# mul_add_c2(a[5],b[3],c3,c1,c2);
1832	sll	$25,1
1833	slt	$6,$24,$0
1834	addu	$25,$6
1835	sll	$24,1
1836	addu	$7,$24
1837	sltu	$1,$7,$24
1838	addu	$25,$1
1839	addu	$2,$25
1840	sltu	$1,$2,$25
1841	addu	$3,$1
1842	mflo	$24
1843	mfhi	$25
1844	slt	$1,$25,$0
1845	addu	$3,$1
1846	multu	$8,$8		# mul_add_c(a[4],b[4],c3,c1,c2);
1847	sll	$25,1
1848	slt	$6,$24,$0
1849	addu	$25,$6
1850	sll	$24,1
1851	addu	$7,$24
1852	sltu	$1,$7,$24
1853	addu	$25,$1
1854	addu	$2,$25
1855	sltu	$1,$2,$25
1856	addu	$3,$1
1857	mflo	$24
1858	mfhi	$25
1859	addu	$7,$24
1860	sltu	$1,$7,$24
1861	 multu	$14,$11		# mul_add_c2(a[2],b[7],c1,c2,c3);
1862	addu	$25,$1
1863	addu	$2,$25
1864	sltu	$1,$2,$25
1865	addu	$3,$1
1866	sw	$7,8*4($4)
1867
1868	mflo	$24
1869	mfhi	$25
1870	slt	$7,$25,$0
1871	sll	$25,1
1872	multu	$15,$10		# mul_add_c2(a[3],b[6],c1,c2,c3);
1873	slt	$6,$24,$0
1874	addu	$25,$6
1875	sll	$24,1
1876	addu	$2,$24
1877	sltu	$1,$2,$24
1878	addu	$25,$1
1879	addu	$3,$25
1880	sltu	$1,$3,$25
1881	addu	$7,$1
1882	mflo	$24
1883	mfhi	$25
1884	slt	$1,$25,$0
1885	addu	$7,$1
1886	multu	$8,$9		# mul_add_c2(a[4],b[5],c1,c2,c3);
1887	sll	$25,1
1888	slt	$6,$24,$0
1889	addu	$25,$6
1890	sll	$24,1
1891	addu	$2,$24
1892	sltu	$1,$2,$24
1893	addu	$25,$1
1894	addu	$3,$25
1895	sltu	$1,$3,$25
1896	addu	$7,$1
1897	mflo	$24
1898	mfhi	$25
1899	slt	$1,$25,$0
1900	addu	$7,$1
1901	 multu	$11,$15		# mul_add_c2(a[7],b[3],c2,c3,c1);
1902	sll	$25,1
1903	slt	$6,$24,$0
1904	addu	$25,$6
1905	sll	$24,1
1906	addu	$2,$24
1907	sltu	$1,$2,$24
1908	addu	$25,$1
1909	addu	$3,$25
1910	sltu	$1,$3,$25
1911	addu	$7,$1
1912	sw	$2,9*4($4)
1913
1914	mflo	$24
1915	mfhi	$25
1916	slt	$2,$25,$0
1917	sll	$25,1
1918	multu	$10,$8		# mul_add_c2(a[6],b[4],c2,c3,c1);
1919	slt	$6,$24,$0
1920	addu	$25,$6
1921	sll	$24,1
1922	addu	$3,$24
1923	sltu	$1,$3,$24
1924	addu	$25,$1
1925	addu	$7,$25
1926	sltu	$1,$7,$25
1927	addu	$2,$1
1928	mflo	$24
1929	mfhi	$25
1930	slt	$1,$25,$0
1931	addu	$2,$1
1932	multu	$9,$9		# mul_add_c(a[5],b[5],c2,c3,c1);
1933	sll	$25,1
1934	slt	$6,$24,$0
1935	addu	$25,$6
1936	sll	$24,1
1937	addu	$3,$24
1938	sltu	$1,$3,$24
1939	addu	$25,$1
1940	addu	$7,$25
1941	sltu	$1,$7,$25
1942	addu	$2,$1
1943	mflo	$24
1944	mfhi	$25
1945	addu	$3,$24
1946	sltu	$1,$3,$24
1947	 multu	$8,$11		# mul_add_c2(a[4],b[7],c3,c1,c2);
1948	addu	$25,$1
1949	addu	$7,$25
1950	sltu	$1,$7,$25
1951	addu	$2,$1
1952	sw	$3,10*4($4)
1953
1954	mflo	$24
1955	mfhi	$25
1956	slt	$3,$25,$0
1957	sll	$25,1
1958	multu	$9,$10		# mul_add_c2(a[5],b[6],c3,c1,c2);
1959	slt	$6,$24,$0
1960	addu	$25,$6
1961	sll	$24,1
1962	addu	$7,$24
1963	sltu	$1,$7,$24
1964	addu	$25,$1
1965	addu	$2,$25
1966	sltu	$1,$2,$25
1967	addu	$3,$1
1968	mflo	$24
1969	mfhi	$25
1970	slt	$1,$25,$0
1971	addu	$3,$1
1972	 multu	$11,$9		# mul_add_c2(a[7],b[5],c1,c2,c3);
1973	sll	$25,1
1974	slt	$6,$24,$0
1975	addu	$25,$6
1976	sll	$24,1
1977	addu	$7,$24
1978	sltu	$1,$7,$24
1979	addu	$25,$1
1980	addu	$2,$25
1981	sltu	$1,$2,$25
1982	addu	$3,$1
1983	sw	$7,11*4($4)
1984
1985	mflo	$24
1986	mfhi	$25
1987	slt	$7,$25,$0
1988	sll	$25,1
1989	multu	$10,$10		# mul_add_c(a[6],b[6],c1,c2,c3);
1990	slt	$6,$24,$0
1991	addu	$25,$6
1992	sll	$24,1
1993	addu	$2,$24
1994	sltu	$1,$2,$24
1995	addu	$25,$1
1996	addu	$3,$25
1997	sltu	$1,$3,$25
1998	addu	$7,$1
1999	mflo	$24
2000	mfhi	$25
2001	addu	$2,$24
2002	sltu	$1,$2,$24
2003	 multu	$10,$11		# mul_add_c2(a[6],b[7],c2,c3,c1);
2004	addu	$25,$1
2005	addu	$3,$25
2006	sltu	$1,$3,$25
2007	addu	$7,$1
2008	sw	$2,12*4($4)
2009
2010	mflo	$24
2011	mfhi	$25
2012	slt	$2,$25,$0
2013	sll	$25,1
2014	 multu	$11,$11		# mul_add_c(a[7],b[7],c3,c1,c2);
2015	slt	$6,$24,$0
2016	addu	$25,$6
2017	sll	$24,1
2018	addu	$3,$24
2019	sltu	$1,$3,$24
2020	addu	$25,$1
2021	addu	$7,$25
2022	sltu	$1,$7,$25
2023	addu	$2,$1
2024	sw	$3,13*4($4)
2025
2026	mflo	$24
2027	mfhi	$25
2028	addu	$7,$24
2029	sltu	$1,$7,$24
2030	addu	$25,$1
2031	addu	$2,$25
2032	sw	$7,14*4($4)
2033	sw	$2,15*4($4)
2034
2035	.set	noreorder
2036	jr	$31
2037	nop
2038.end	bn_sqr_comba8
2039
2040.align	5
2041.globl	bn_sqr_comba4
2042.ent	bn_sqr_comba4
2043bn_sqr_comba4:
2044	.set	reorder
2045	lw	$12,0($5)
2046	lw	$13,4($5)
2047	multu	$12,$12		# mul_add_c(a[0],b[0],c1,c2,c3);
2048	lw	$14,2*4($5)
2049	lw	$15,3*4($5)
2050	mflo	$2
2051	mfhi	$3
2052	sw	$2,0($4)
2053
2054	multu	$12,$13		# mul_add_c2(a[0],b[1],c2,c3,c1);
2055	mflo	$24
2056	mfhi	$25
2057	slt	$2,$25,$0
2058	sll	$25,1
2059	 multu	$14,$12		# mul_add_c2(a[2],b[0],c3,c1,c2);
2060	slt	$6,$24,$0
2061	addu	$25,$6
2062	sll	$24,1
2063	addu	$3,$24
2064	sltu	$1,$3,$24
2065	addu	$7,$25,$1
2066	sw	$3,4($4)
2067
2068	mflo	$24
2069	mfhi	$25
2070	slt	$3,$25,$0
2071	sll	$25,1
2072	multu	$13,$13		# mul_add_c(a[1],b[1],c3,c1,c2);
2073	slt	$6,$24,$0
2074	addu	$25,$6
2075	sll	$24,1
2076	addu	$7,$24
2077	sltu	$1,$7,$24
2078	addu	$25,$1
2079	addu	$2,$25
2080	sltu	$1,$2,$25
2081	addu	$3,$1
2082	mflo	$24
2083	mfhi	$25
2084	addu	$7,$24
2085	sltu	$1,$7,$24
2086	 multu	$12,$15		# mul_add_c2(a[0],b[3],c1,c2,c3);
2087	addu	$25,$1
2088	addu	$2,$25
2089	sltu	$1,$2,$25
2090	addu	$3,$1
2091	sw	$7,2*4($4)
2092
2093	mflo	$24
2094	mfhi	$25
2095	slt	$7,$25,$0
2096	sll	$25,1
2097	multu	$13,$14		# mul_add_c(a2[1],b[2],c1,c2,c3);
2098	slt	$6,$24,$0
2099	addu	$25,$6
2100	sll	$24,1
2101	addu	$2,$24
2102	sltu	$1,$2,$24
2103	addu	$25,$1
2104	addu	$3,$25
2105	sltu	$1,$3,$25
2106	addu	$7,$1
2107	mflo	$24
2108	mfhi	$25
2109	slt	$1,$25,$0
2110	addu	$7,$1
2111	 multu	$15,$13		# mul_add_c2(a[3],b[1],c2,c3,c1);
2112	sll	$25,1
2113	slt	$6,$24,$0
2114	addu	$25,$6
2115	sll	$24,1
2116	addu	$2,$24
2117	sltu	$1,$2,$24
2118	addu	$25,$1
2119	addu	$3,$25
2120	sltu	$1,$3,$25
2121	addu	$7,$1
2122	sw	$2,3*4($4)
2123
2124	mflo	$24
2125	mfhi	$25
2126	slt	$2,$25,$0
2127	sll	$25,1
2128	multu	$14,$14		# mul_add_c(a[2],b[2],c2,c3,c1);
2129	slt	$6,$24,$0
2130	addu	$25,$6
2131	sll	$24,1
2132	addu	$3,$24
2133	sltu	$1,$3,$24
2134	addu	$25,$1
2135	addu	$7,$25
2136	sltu	$1,$7,$25
2137	addu	$2,$1
2138	mflo	$24
2139	mfhi	$25
2140	addu	$3,$24
2141	sltu	$1,$3,$24
2142	 multu	$14,$15		# mul_add_c2(a[2],b[3],c3,c1,c2);
2143	addu	$25,$1
2144	addu	$7,$25
2145	sltu	$1,$7,$25
2146	addu	$2,$1
2147	sw	$3,4*4($4)
2148
2149	mflo	$24
2150	mfhi	$25
2151	slt	$3,$25,$0
2152	sll	$25,1
2153	 multu	$15,$15		# mul_add_c(a[3],b[3],c1,c2,c3);
2154	slt	$6,$24,$0
2155	addu	$25,$6
2156	sll	$24,1
2157	addu	$7,$24
2158	sltu	$1,$7,$24
2159	addu	$25,$1
2160	addu	$2,$25
2161	sltu	$1,$2,$25
2162	addu	$3,$1
2163	sw	$7,5*4($4)
2164
2165	mflo	$24
2166	mfhi	$25
2167	addu	$2,$24
2168	sltu	$1,$2,$24
2169	addu	$25,$1
2170	addu	$3,$25
2171	sw	$2,6*4($4)
2172	sw	$3,7*4($4)
2173
2174	.set	noreorder
2175	jr	$31
2176	nop
2177.end	bn_sqr_comba4
2178