• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.machine	"any"
2.abiversion	2
3.text
4
5.globl	bn_mul_mont_int
6.type	bn_mul_mont_int,@function
7.align	5
8bn_mul_mont_int:
9.localentry	bn_mul_mont_int,0
10
11	mr	9,3
12	li	3,0
13	slwi	8,8,3
14	li	12,-4096
15	addi	3,8,352
16	subf	3,3,1
17	and	3,3,12
18	subf	3,1,3
19	mr	12,1
20	srwi	8,8,3
21	stdux	1,1,3
22
23	std	20,-96(12)
24	std	21,-88(12)
25	std	22,-80(12)
26	std	23,-72(12)
27	std	24,-64(12)
28	std	25,-56(12)
29	std	26,-48(12)
30	std	27,-40(12)
31	std	28,-32(12)
32	std	29,-24(12)
33	std	30,-16(12)
34	std	31,-8(12)
35
36	ld	7,0(7)
37	addi	8,8,-2
38
39	ld	23,0(5)
40	ld	10,0(4)
41	addi	22,1,64
42	mulld	25,10,23
43	mulhdu	26,10,23
44
45	ld	10,8(4)
46	ld	11,0(6)
47
48	mulld	24,25,7
49
50	mulld	29,10,23
51	mulhdu	30,10,23
52
53	mulld	27,11,24
54	mulhdu	28,11,24
55	ld	11,8(6)
56	addc	27,27,25
57	addze	28,28
58
59	mulld	31,11,24
60	mulhdu	0,11,24
61
62	mtctr	8
63	li	21,16
64.align	4
65.L1st:
66	ldx	10,4,21
67	addc	25,29,26
68	ldx	11,6,21
69	addze	26,30
70	mulld	29,10,23
71	addc	27,31,28
72	mulhdu	30,10,23
73	addze	28,0
74	mulld	31,11,24
75	addc	27,27,25
76	mulhdu	0,11,24
77	addze	28,28
78	std	27,0(22)
79
80	addi	21,21,8
81	addi	22,22,8
82	bdnz	.L1st
83
84	addc	25,29,26
85	addze	26,30
86
87	addc	27,31,28
88	addze	28,0
89	addc	27,27,25
90	addze	28,28
91	std	27,0(22)
92
93	li	3,0
94	addc	28,28,26
95	addze	3,3
96	std	28,8(22)
97
98	li	20,8
99.align	4
100.Louter:
101	ldx	23,5,20
102	ld	10,0(4)
103	addi	22,1,64
104	ld	12,64(1)
105	mulld	25,10,23
106	mulhdu	26,10,23
107	ld	10,8(4)
108	ld	11,0(6)
109	addc	25,25,12
110	mulld	29,10,23
111	addze	26,26
112	mulld	24,25,7
113	mulhdu	30,10,23
114	mulld	27,11,24
115	mulhdu	28,11,24
116	ld	11,8(6)
117	addc	27,27,25
118	mulld	31,11,24
119	addze	28,28
120	mulhdu	0,11,24
121
122	mtctr	8
123	li	21,16
124.align	4
125.Linner:
126	ldx	10,4,21
127	addc	25,29,26
128	ld	12,8(22)
129	addze	26,30
130	ldx	11,6,21
131	addc	27,31,28
132	mulld	29,10,23
133	addze	28,0
134	mulhdu	30,10,23
135	addc	25,25,12
136	mulld	31,11,24
137	addze	26,26
138	mulhdu	0,11,24
139	addc	27,27,25
140	addi	21,21,8
141	addze	28,28
142	std	27,0(22)
143	addi	22,22,8
144	bdnz	.Linner
145
146	ld	12,8(22)
147	addc	25,29,26
148	addze	26,30
149	addc	25,25,12
150	addze	26,26
151
152	addc	27,31,28
153	addze	28,0
154	addc	27,27,25
155	addze	28,28
156	std	27,0(22)
157
158	addic	3,3,-1
159	li	3,0
160	adde	28,28,26
161	addze	3,3
162	std	28,8(22)
163
164	slwi	12,8,3
165	cmpld	20,12
166	addi	20,20,8
167	ble	.Louter
168
169	addi	8,8,2
170	subfc	21,21,21
171	addi	22,1,64
172	mtctr	8
173
174.align	4
175.Lsub:	ldx	12,22,21
176	ldx	11,6,21
177	subfe	10,11,12
178	stdx	10,9,21
179	addi	21,21,8
180	bdnz	.Lsub
181
182	li	21,0
183	mtctr	8
184	subfe	3,21,3
185
186.align	4
187.Lcopy:
188	ldx	12,22,21
189	ldx	10,9,21
190	and	12,12,3
191	andc	10,10,3
192	stdx	21,22,21
193	or	10,10,12
194	stdx	10,9,21
195	addi	21,21,8
196	bdnz	.Lcopy
197
198	ld	12,0(1)
199	li	3,1
200	ld	20,-96(12)
201	ld	21,-88(12)
202	ld	22,-80(12)
203	ld	23,-72(12)
204	ld	24,-64(12)
205	ld	25,-56(12)
206	ld	26,-48(12)
207	ld	27,-40(12)
208	ld	28,-32(12)
209	ld	29,-24(12)
210	ld	30,-16(12)
211	ld	31,-8(12)
212	mr	1,12
213	blr
214.long	0
215.byte	0,12,4,0,0x80,12,6,0
216.long	0
217.size	bn_mul_mont_int,.-bn_mul_mont_int
218.globl	bn_mul4x_mont_int
219.type	bn_mul4x_mont_int,@function
220.align	5
221bn_mul4x_mont_int:
222.localentry	bn_mul4x_mont_int,0
223
224	andi.	0,8,7
225	bne	.Lmul4x_do
226	cmpld	4,5
227	bne	.Lmul4x_do
228	b	.Lsqr8x_do
229.Lmul4x_do:
230	slwi	8,8,3
231	mr	9,1
232	li	10,-32*8
233	sub	10,10,8
234	stdux	1,1,10
235
236	std	14,-8*18(9)
237	std	15,-8*17(9)
238	std	16,-8*16(9)
239	std	17,-8*15(9)
240	std	18,-8*14(9)
241	std	19,-8*13(9)
242	std	20,-8*12(9)
243	std	21,-8*11(9)
244	std	22,-8*10(9)
245	std	23,-8*9(9)
246	std	24,-8*8(9)
247	std	25,-8*7(9)
248	std	26,-8*6(9)
249	std	27,-8*5(9)
250	std	28,-8*4(9)
251	std	29,-8*3(9)
252	std	30,-8*2(9)
253	std	31,-8*1(9)
254
255	subi	4,4,8
256	subi	6,6,8
257	subi	3,3,8
258	ld	7,0(7)
259
260	add	14,5,8
261	add	30,4,8
262	subi	14,14,8*4
263
264	ld	27,8*0(5)
265	li	22,0
266	ld	9,8*1(4)
267	li	23,0
268	ld	10,8*2(4)
269	li	24,0
270	ld	11,8*3(4)
271	li	25,0
272	ldu	12,8*4(4)
273	ld	18,8*1(6)
274	ld	19,8*2(6)
275	ld	20,8*3(6)
276	ldu	21,8*4(6)
277
278	std	3,8*6(1)
279	std	14,8*7(1)
280	li	3,0
281	addic	29,1,8*7
282	li	31,0
283	li	0,0
284	b	.Loop_mul4x_1st_reduction
285
286.align	5
287.Loop_mul4x_1st_reduction:
288	mulld	14,9,27
289	addze	3,3
290	mulld	15,10,27
291	addi	31,31,8
292	mulld	16,11,27
293	andi.	31,31,8*4-1
294	mulld	17,12,27
295	addc	22,22,14
296	mulhdu	14,9,27
297	adde	23,23,15
298	mulhdu	15,10,27
299	adde	24,24,16
300	mulld	28,22,7
301	adde	25,25,17
302	mulhdu	16,11,27
303	addze	26,0
304	mulhdu	17,12,27
305	ldx	27,5,31
306	addc	23,23,14
307
308	stdu	28,8(29)
309	adde	24,24,15
310	mulld	15,19,28
311	adde	25,25,16
312	mulld	16,20,28
313	adde	26,26,17
314	mulld	17,21,28
315
316
317
318
319
320
321
322
323
324
325	addic	22,22,-1
326	mulhdu	14,18,28
327	adde	22,23,15
328	mulhdu	15,19,28
329	adde	23,24,16
330	mulhdu	16,20,28
331	adde	24,25,17
332	mulhdu	17,21,28
333	adde	25,26,3
334	addze	3,0
335	addc	22,22,14
336	adde	23,23,15
337	adde	24,24,16
338	adde	25,25,17
339
340	bne	.Loop_mul4x_1st_reduction
341
342	cmpld	30,4
343	beq	.Lmul4x4_post_condition
344
345	ld	9,8*1(4)
346	ld	10,8*2(4)
347	ld	11,8*3(4)
348	ldu	12,8*4(4)
349	ld	28,8*8(1)
350	ld	18,8*1(6)
351	ld	19,8*2(6)
352	ld	20,8*3(6)
353	ldu	21,8*4(6)
354	b	.Loop_mul4x_1st_tail
355
356.align	5
357.Loop_mul4x_1st_tail:
358	mulld	14,9,27
359	addze	3,3
360	mulld	15,10,27
361	addi	31,31,8
362	mulld	16,11,27
363	andi.	31,31,8*4-1
364	mulld	17,12,27
365	addc	22,22,14
366	mulhdu	14,9,27
367	adde	23,23,15
368	mulhdu	15,10,27
369	adde	24,24,16
370	mulhdu	16,11,27
371	adde	25,25,17
372	mulhdu	17,12,27
373	addze	26,0
374	ldx	27,5,31
375	addc	23,23,14
376	mulld	14,18,28
377	adde	24,24,15
378	mulld	15,19,28
379	adde	25,25,16
380	mulld	16,20,28
381	adde	26,26,17
382	mulld	17,21,28
383	addc	22,22,14
384	mulhdu	14,18,28
385	adde	23,23,15
386	mulhdu	15,19,28
387	adde	24,24,16
388	mulhdu	16,20,28
389	adde	25,25,17
390	adde	26,26,3
391	mulhdu	17,21,28
392	addze	3,0
393	addi	28,1,8*8
394	ldx	28,28,31
395	stdu	22,8(29)
396	addc	22,23,14
397	adde	23,24,15
398	adde	24,25,16
399	adde	25,26,17
400
401	bne	.Loop_mul4x_1st_tail
402
403	sub	15,30,8
404	cmpld	30,4
405	beq	.Lmul4x_proceed
406
407	ld	9,8*1(4)
408	ld	10,8*2(4)
409	ld	11,8*3(4)
410	ldu	12,8*4(4)
411	ld	18,8*1(6)
412	ld	19,8*2(6)
413	ld	20,8*3(6)
414	ldu	21,8*4(6)
415	b	.Loop_mul4x_1st_tail
416
417.align	5
418.Lmul4x_proceed:
419	ldu	27,8*4(5)
420	addze	3,3
421	ld	9,8*1(15)
422	ld	10,8*2(15)
423	ld	11,8*3(15)
424	ld	12,8*4(15)
425	addi	4,15,8*4
426	sub	6,6,8
427
428	std	22,8*1(29)
429	std	23,8*2(29)
430	std	24,8*3(29)
431	std	25,8*4(29)
432	std	3,8*5(29)
433	ld	22,8*12(1)
434	ld	23,8*13(1)
435	ld	24,8*14(1)
436	ld	25,8*15(1)
437
438	ld	18,8*1(6)
439	ld	19,8*2(6)
440	ld	20,8*3(6)
441	ldu	21,8*4(6)
442	addic	29,1,8*7
443	li	3,0
444	b	.Loop_mul4x_reduction
445
446.align	5
447.Loop_mul4x_reduction:
448	mulld	14,9,27
449	addze	3,3
450	mulld	15,10,27
451	addi	31,31,8
452	mulld	16,11,27
453	andi.	31,31,8*4-1
454	mulld	17,12,27
455	addc	22,22,14
456	mulhdu	14,9,27
457	adde	23,23,15
458	mulhdu	15,10,27
459	adde	24,24,16
460	mulld	28,22,7
461	adde	25,25,17
462	mulhdu	16,11,27
463	addze	26,0
464	mulhdu	17,12,27
465	ldx	27,5,31
466	addc	23,23,14
467
468	stdu	28,8(29)
469	adde	24,24,15
470	mulld	15,19,28
471	adde	25,25,16
472	mulld	16,20,28
473	adde	26,26,17
474	mulld	17,21,28
475
476	addic	22,22,-1
477	mulhdu	14,18,28
478	adde	22,23,15
479	mulhdu	15,19,28
480	adde	23,24,16
481	mulhdu	16,20,28
482	adde	24,25,17
483	mulhdu	17,21,28
484	adde	25,26,3
485	addze	3,0
486	addc	22,22,14
487	adde	23,23,15
488	adde	24,24,16
489	adde	25,25,17
490
491	bne	.Loop_mul4x_reduction
492
493	ld	14,8*5(29)
494	addze	3,3
495	ld	15,8*6(29)
496	ld	16,8*7(29)
497	ld	17,8*8(29)
498	ld	9,8*1(4)
499	ld	10,8*2(4)
500	ld	11,8*3(4)
501	ldu	12,8*4(4)
502	addc	22,22,14
503	adde	23,23,15
504	adde	24,24,16
505	adde	25,25,17
506
507
508	ld	28,8*8(1)
509	ld	18,8*1(6)
510	ld	19,8*2(6)
511	ld	20,8*3(6)
512	ldu	21,8*4(6)
513	b	.Loop_mul4x_tail
514
515.align	5
516.Loop_mul4x_tail:
517	mulld	14,9,27
518	addze	3,3
519	mulld	15,10,27
520	addi	31,31,8
521	mulld	16,11,27
522	andi.	31,31,8*4-1
523	mulld	17,12,27
524	addc	22,22,14
525	mulhdu	14,9,27
526	adde	23,23,15
527	mulhdu	15,10,27
528	adde	24,24,16
529	mulhdu	16,11,27
530	adde	25,25,17
531	mulhdu	17,12,27
532	addze	26,0
533	ldx	27,5,31
534	addc	23,23,14
535	mulld	14,18,28
536	adde	24,24,15
537	mulld	15,19,28
538	adde	25,25,16
539	mulld	16,20,28
540	adde	26,26,17
541	mulld	17,21,28
542	addc	22,22,14
543	mulhdu	14,18,28
544	adde	23,23,15
545	mulhdu	15,19,28
546	adde	24,24,16
547	mulhdu	16,20,28
548	adde	25,25,17
549	mulhdu	17,21,28
550	adde	26,26,3
551	addi	28,1,8*8
552	ldx	28,28,31
553	addze	3,0
554	stdu	22,8(29)
555	addc	22,23,14
556	adde	23,24,15
557	adde	24,25,16
558	adde	25,26,17
559
560	bne	.Loop_mul4x_tail
561
562	ld	14,8*5(29)
563	sub	15,6,8
564	addze	3,3
565	cmpld	30,4
566	beq	.Loop_mul4x_break
567
568	ld	15,8*6(29)
569	ld	16,8*7(29)
570	ld	17,8*8(29)
571	ld	9,8*1(4)
572	ld	10,8*2(4)
573	ld	11,8*3(4)
574	ldu	12,8*4(4)
575	addc	22,22,14
576	adde	23,23,15
577	adde	24,24,16
578	adde	25,25,17
579
580
581	ld	18,8*1(6)
582	ld	19,8*2(6)
583	ld	20,8*3(6)
584	ldu	21,8*4(6)
585	b	.Loop_mul4x_tail
586
587.align	5
588.Loop_mul4x_break:
589	ld	16,8*6(1)
590	ld	17,8*7(1)
591	addc	9,22,14
592	ld	22,8*12(1)
593	addze	10,23
594	ld	23,8*13(1)
595	addze	11,24
596	ld	24,8*14(1)
597	addze	12,25
598	ld	25,8*15(1)
599	addze	3,3
600	std	9,8*1(29)
601	sub	4,30,8
602	std	10,8*2(29)
603	std	11,8*3(29)
604	std	12,8*4(29)
605	std	3,8*5(29)
606
607	ld	18,8*1(15)
608	ld	19,8*2(15)
609	ld	20,8*3(15)
610	ld	21,8*4(15)
611	addi	6,15,8*4
612	cmpld	5,17
613	beq	.Lmul4x_post
614
615	ldu	27,8*4(5)
616	ld	9,8*1(4)
617	ld	10,8*2(4)
618	ld	11,8*3(4)
619	ldu	12,8*4(4)
620	li	3,0
621	addic	29,1,8*7
622	b	.Loop_mul4x_reduction
623
624.align	5
625.Lmul4x_post:
626
627
628
629
630	srwi	31,8,5
631	mr	5,16
632	subi	31,31,1
633	mr	30,16
634	subfc	14,18,22
635	addi	29,1,8*15
636	subfe	15,19,23
637
638	mtctr	31
639.Lmul4x_sub:
640	ld	18,8*1(6)
641	ld	22,8*1(29)
642	subfe	16,20,24
643	ld	19,8*2(6)
644	ld	23,8*2(29)
645	subfe	17,21,25
646	ld	20,8*3(6)
647	ld	24,8*3(29)
648	ldu	21,8*4(6)
649	ldu	25,8*4(29)
650	std	14,8*1(5)
651	std	15,8*2(5)
652	subfe	14,18,22
653	std	16,8*3(5)
654	stdu	17,8*4(5)
655	subfe	15,19,23
656	bdnz	.Lmul4x_sub
657
658	ld	9,8*1(30)
659	std	14,8*1(5)
660	ld	14,8*12(1)
661	subfe	16,20,24
662	ld	10,8*2(30)
663	std	15,8*2(5)
664	ld	15,8*13(1)
665	subfe	17,21,25
666	subfe	3,0,3
667	addi	29,1,8*12
668	ld	11,8*3(30)
669	std	16,8*3(5)
670	ld	16,8*14(1)
671	ld	12,8*4(30)
672	std	17,8*4(5)
673	ld	17,8*15(1)
674
675	mtctr	31
676.Lmul4x_cond_copy:
677	and	14,14,3
678	andc	9,9,3
679	std	0,8*0(29)
680	and	15,15,3
681	andc	10,10,3
682	std	0,8*1(29)
683	and	16,16,3
684	andc	11,11,3
685	std	0,8*2(29)
686	and	17,17,3
687	andc	12,12,3
688	std	0,8*3(29)
689	or	22,14,9
690	ld	9,8*5(30)
691	ld	14,8*4(29)
692	or	23,15,10
693	ld	10,8*6(30)
694	ld	15,8*5(29)
695	or	24,16,11
696	ld	11,8*7(30)
697	ld	16,8*6(29)
698	or	25,17,12
699	ld	12,8*8(30)
700	ld	17,8*7(29)
701	addi	29,29,8*4
702	std	22,8*1(30)
703	std	23,8*2(30)
704	std	24,8*3(30)
705	stdu	25,8*4(30)
706	bdnz	.Lmul4x_cond_copy
707
708	ld	5,0(1)
709	and	14,14,3
710	andc	9,9,3
711	std	0,8*0(29)
712	and	15,15,3
713	andc	10,10,3
714	std	0,8*1(29)
715	and	16,16,3
716	andc	11,11,3
717	std	0,8*2(29)
718	and	17,17,3
719	andc	12,12,3
720	std	0,8*3(29)
721	or	22,14,9
722	or	23,15,10
723	std	0,8*4(29)
724	or	24,16,11
725	or	25,17,12
726	std	22,8*1(30)
727	std	23,8*2(30)
728	std	24,8*3(30)
729	std	25,8*4(30)
730
731	b	.Lmul4x_done
732
733.align	4
734.Lmul4x4_post_condition:
735	ld	4,8*6(1)
736	ld	5,0(1)
737	addze	3,3
738
739	subfc	9,18,22
740	subfe	10,19,23
741	subfe	11,20,24
742	subfe	12,21,25
743	subfe	3,0,3
744
745	and	18,18,3
746	and	19,19,3
747	addc	9,9,18
748	and	20,20,3
749	adde	10,10,19
750	and	21,21,3
751	adde	11,11,20
752	adde	12,12,21
753
754	std	9,8*1(4)
755	std	10,8*2(4)
756	std	11,8*3(4)
757	std	12,8*4(4)
758
759.Lmul4x_done:
760	std	0,8*8(1)
761	std	0,8*9(1)
762	std	0,8*10(1)
763	std	0,8*11(1)
764	li	3,1
765	ld	14,-8*18(5)
766	ld	15,-8*17(5)
767	ld	16,-8*16(5)
768	ld	17,-8*15(5)
769	ld	18,-8*14(5)
770	ld	19,-8*13(5)
771	ld	20,-8*12(5)
772	ld	21,-8*11(5)
773	ld	22,-8*10(5)
774	ld	23,-8*9(5)
775	ld	24,-8*8(5)
776	ld	25,-8*7(5)
777	ld	26,-8*6(5)
778	ld	27,-8*5(5)
779	ld	28,-8*4(5)
780	ld	29,-8*3(5)
781	ld	30,-8*2(5)
782	ld	31,-8*1(5)
783	mr	1,5
784	blr
785.long	0
786.byte	0,12,4,0x20,0x80,18,6,0
787.long	0
788.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
789.align	5
790__bn_sqr8x_mont:
791.Lsqr8x_do:
792	mr	9,1
793	slwi	10,8,4
794	li	11,-32*8
795	sub	10,11,10
796	slwi	8,8,3
797	stdux	1,1,10
798
799	std	14,-8*18(9)
800	std	15,-8*17(9)
801	std	16,-8*16(9)
802	std	17,-8*15(9)
803	std	18,-8*14(9)
804	std	19,-8*13(9)
805	std	20,-8*12(9)
806	std	21,-8*11(9)
807	std	22,-8*10(9)
808	std	23,-8*9(9)
809	std	24,-8*8(9)
810	std	25,-8*7(9)
811	std	26,-8*6(9)
812	std	27,-8*5(9)
813	std	28,-8*4(9)
814	std	29,-8*3(9)
815	std	30,-8*2(9)
816	std	31,-8*1(9)
817
818	subi	4,4,8
819	subi	18,6,8
820	subi	3,3,8
821	ld	7,0(7)
822	li	0,0
823
824	add	6,4,8
825	ld	9,8*1(4)
826
827	ld	10,8*2(4)
828	li	23,0
829	ld	11,8*3(4)
830	li	24,0
831	ld	12,8*4(4)
832	li	25,0
833	ld	14,8*5(4)
834	li	26,0
835	ld	15,8*6(4)
836	li	27,0
837	ld	16,8*7(4)
838	li	28,0
839	ldu	17,8*8(4)
840	li	29,0
841
842	addi	5,1,8*11
843	subic.	30,8,8*8
844	b	.Lsqr8x_zero_start
845
846.align	5
847.Lsqr8x_zero:
848	subic.	30,30,8*8
849	std	0,8*1(5)
850	std	0,8*2(5)
851	std	0,8*3(5)
852	std	0,8*4(5)
853	std	0,8*5(5)
854	std	0,8*6(5)
855	std	0,8*7(5)
856	std	0,8*8(5)
857.Lsqr8x_zero_start:
858	std	0,8*9(5)
859	std	0,8*10(5)
860	std	0,8*11(5)
861	std	0,8*12(5)
862	std	0,8*13(5)
863	std	0,8*14(5)
864	std	0,8*15(5)
865	stdu	0,8*16(5)
866	bne	.Lsqr8x_zero
867
868	std	3,8*6(1)
869	std	18,8*7(1)
870	std	7,8*8(1)
871	std	5,8*9(1)
872	std	0,8*10(1)
873	addi	5,1,8*11
874
875
876.align	5
877.Lsqr8x_outer_loop:
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907	mulld	18,10,9
908	mulld	19,11,9
909	mulld	20,12,9
910	mulld	21,14,9
911	addc	23,23,18
912	mulld	18,15,9
913	adde	24,24,19
914	mulld	19,16,9
915	adde	25,25,20
916	mulld	20,17,9
917	adde	26,26,21
918	mulhdu	21,10,9
919	adde	27,27,18
920	mulhdu	18,11,9
921	adde	28,28,19
922	mulhdu	19,12,9
923	adde	29,29,20
924	mulhdu	20,14,9
925	std	22,8*1(5)
926	addze	22,0
927	std	23,8*2(5)
928	addc	24,24,21
929	mulhdu	21,15,9
930	adde	25,25,18
931	mulhdu	18,16,9
932	adde	26,26,19
933	mulhdu	19,17,9
934	adde	27,27,20
935	mulld	20,11,10
936	adde	28,28,21
937	mulld	21,12,10
938	adde	29,29,18
939	mulld	18,14,10
940	adde	22,22,19
941
942	mulld	19,15,10
943	addc	25,25,20
944	mulld	20,16,10
945	adde	26,26,21
946	mulld	21,17,10
947	adde	27,27,18
948	mulhdu	18,11,10
949	adde	28,28,19
950	mulhdu	19,12,10
951	adde	29,29,20
952	mulhdu	20,14,10
953	adde	22,22,21
954	mulhdu	21,15,10
955	std	24,8*3(5)
956	addze	23,0
957	std	25,8*4(5)
958	addc	26,26,18
959	mulhdu	18,16,10
960	adde	27,27,19
961	mulhdu	19,17,10
962	adde	28,28,20
963	mulld	20,12,11
964	adde	29,29,21
965	mulld	21,14,11
966	adde	22,22,18
967	mulld	18,15,11
968	adde	23,23,19
969
970	mulld	19,16,11
971	addc	27,27,20
972	mulld	20,17,11
973	adde	28,28,21
974	mulhdu	21,12,11
975	adde	29,29,18
976	mulhdu	18,14,11
977	adde	22,22,19
978	mulhdu	19,15,11
979	adde	23,23,20
980	mulhdu	20,16,11
981	std	26,8*5(5)
982	addze	24,0
983	std	27,8*6(5)
984	addc	28,28,21
985	mulhdu	21,17,11
986	adde	29,29,18
987	mulld	18,14,12
988	adde	22,22,19
989	mulld	19,15,12
990	adde	23,23,20
991	mulld	20,16,12
992	adde	24,24,21
993
994	mulld	21,17,12
995	addc	29,29,18
996	mulhdu	18,14,12
997	adde	22,22,19
998	mulhdu	19,15,12
999	adde	23,23,20
1000	mulhdu	20,16,12
1001	adde	24,24,21
1002	mulhdu	21,17,12
1003	std	28,8*7(5)
1004	addze	25,0
1005	stdu	29,8*8(5)
1006	addc	22,22,18
1007	mulld	18,15,14
1008	adde	23,23,19
1009	mulld	19,16,14
1010	adde	24,24,20
1011	mulld	20,17,14
1012	adde	25,25,21
1013
1014	mulhdu	21,15,14
1015	addc	23,23,18
1016	mulhdu	18,16,14
1017	adde	24,24,19
1018	mulhdu	19,17,14
1019	adde	25,25,20
1020	mulld	20,16,15
1021	addze	26,0
1022	addc	24,24,21
1023	mulld	21,17,15
1024	adde	25,25,18
1025	mulhdu	18,16,15
1026	adde	26,26,19
1027
1028	mulhdu	19,17,15
1029	addc	25,25,20
1030	mulld	20,17,16
1031	adde	26,26,21
1032	mulhdu	21,17,16
1033	addze	27,0
1034	addc	26,26,18
1035	cmpld	6,4
1036	adde	27,27,19
1037
1038	addc	27,27,20
1039	sub	18,6,8
1040	addze	28,0
1041	add	28,28,21
1042
1043	beq	.Lsqr8x_outer_break
1044
1045	mr	7,9
1046	ld	9,8*1(5)
1047	ld	10,8*2(5)
1048	ld	11,8*3(5)
1049	ld	12,8*4(5)
1050	ld	14,8*5(5)
1051	ld	15,8*6(5)
1052	ld	16,8*7(5)
1053	ld	17,8*8(5)
1054	addc	22,22,9
1055	ld	9,8*1(4)
1056	adde	23,23,10
1057	ld	10,8*2(4)
1058	adde	24,24,11
1059	ld	11,8*3(4)
1060	adde	25,25,12
1061	ld	12,8*4(4)
1062	adde	26,26,14
1063	ld	14,8*5(4)
1064	adde	27,27,15
1065	ld	15,8*6(4)
1066	adde	28,28,16
1067	ld	16,8*7(4)
1068	subi	3,4,8*7
1069	addze	29,17
1070	ldu	17,8*8(4)
1071
1072	li	30,0
1073	b	.Lsqr8x_mul
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097.align	5
1098.Lsqr8x_mul:
1099	mulld	18,9,7
1100	addze	31,0
1101	mulld	19,10,7
1102	addi	30,30,8
1103	mulld	20,11,7
1104	andi.	30,30,8*8-1
1105	mulld	21,12,7
1106	addc	22,22,18
1107	mulld	18,14,7
1108	adde	23,23,19
1109	mulld	19,15,7
1110	adde	24,24,20
1111	mulld	20,16,7
1112	adde	25,25,21
1113	mulld	21,17,7
1114	adde	26,26,18
1115	mulhdu	18,9,7
1116	adde	27,27,19
1117	mulhdu	19,10,7
1118	adde	28,28,20
1119	mulhdu	20,11,7
1120	adde	29,29,21
1121	mulhdu	21,12,7
1122	addze	31,31
1123	stdu	22,8(5)
1124	addc	22,23,18
1125	mulhdu	18,14,7
1126	adde	23,24,19
1127	mulhdu	19,15,7
1128	adde	24,25,20
1129	mulhdu	20,16,7
1130	adde	25,26,21
1131	mulhdu	21,17,7
1132	ldx	7,3,30
1133	adde	26,27,18
1134	adde	27,28,19
1135	adde	28,29,20
1136	adde	29,31,21
1137
1138	bne	.Lsqr8x_mul
1139
1140
1141	cmpld	4,6
1142	beq	.Lsqr8x_break
1143
1144	ld	9,8*1(5)
1145	ld	10,8*2(5)
1146	ld	11,8*3(5)
1147	ld	12,8*4(5)
1148	ld	14,8*5(5)
1149	ld	15,8*6(5)
1150	ld	16,8*7(5)
1151	ld	17,8*8(5)
1152	addc	22,22,9
1153	ld	9,8*1(4)
1154	adde	23,23,10
1155	ld	10,8*2(4)
1156	adde	24,24,11
1157	ld	11,8*3(4)
1158	adde	25,25,12
1159	ld	12,8*4(4)
1160	adde	26,26,14
1161	ld	14,8*5(4)
1162	adde	27,27,15
1163	ld	15,8*6(4)
1164	adde	28,28,16
1165	ld	16,8*7(4)
1166	adde	29,29,17
1167	ldu	17,8*8(4)
1168
1169	b	.Lsqr8x_mul
1170
1171.align	5
1172.Lsqr8x_break:
1173	ld	9,8*8(3)
1174	addi	4,3,8*15
1175	ld	10,8*9(3)
1176	sub.	18,6,4
1177	ld	11,8*10(3)
1178	sub	19,5,18
1179	ld	12,8*11(3)
1180	ld	14,8*12(3)
1181	ld	15,8*13(3)
1182	ld	16,8*14(3)
1183	ld	17,8*15(3)
1184	beq	.Lsqr8x_outer_loop
1185
1186	std	22,8*1(5)
1187	ld	22,8*1(19)
1188	std	23,8*2(5)
1189	ld	23,8*2(19)
1190	std	24,8*3(5)
1191	ld	24,8*3(19)
1192	std	25,8*4(5)
1193	ld	25,8*4(19)
1194	std	26,8*5(5)
1195	ld	26,8*5(19)
1196	std	27,8*6(5)
1197	ld	27,8*6(19)
1198	std	28,8*7(5)
1199	ld	28,8*7(19)
1200	std	29,8*8(5)
1201	ld	29,8*8(19)
1202	mr	5,19
1203	b	.Lsqr8x_outer_loop
1204
1205.align	5
1206.Lsqr8x_outer_break:
1207
1208
1209	ld	10,8*1(18)
1210	ld	12,8*2(18)
1211	ld	15,8*3(18)
1212	ld	17,8*4(18)
1213	addi	4,18,8*4
1214
1215	ld	19,8*13(1)
1216	ld	20,8*14(1)
1217	ld	21,8*15(1)
1218	ld	18,8*16(1)
1219
1220	std	22,8*1(5)
1221	srwi	30,8,5
1222	std	23,8*2(5)
1223	subi	30,30,1
1224	std	24,8*3(5)
1225	std	25,8*4(5)
1226	std	26,8*5(5)
1227	std	27,8*6(5)
1228	std	28,8*7(5)
1229
1230	addi	5,1,8*11
1231	mulld	22,10,10
1232	mulhdu	10,10,10
1233	add	23,19,19
1234	srdi	19,19,64-1
1235	mulld	11,12,12
1236	mulhdu	12,12,12
1237	addc	23,23,10
1238	add	24,20,20
1239	srdi	20,20,64-1
1240	add	25,21,21
1241	srdi	21,21,64-1
1242	or	24,24,19
1243
1244	mtctr	30
1245.Lsqr4x_shift_n_add:
1246	mulld	14,15,15
1247	mulhdu	15,15,15
1248	ld	19,8*6(5)
1249	ld	10,8*1(4)
1250	adde	24,24,11
1251	add	26,18,18
1252	srdi	18,18,64-1
1253	or	25,25,20
1254	ld	20,8*7(5)
1255	adde	25,25,12
1256	ld	12,8*2(4)
1257	add	27,19,19
1258	srdi	19,19,64-1
1259	or	26,26,21
1260	ld	21,8*8(5)
1261	mulld	16,17,17
1262	mulhdu	17,17,17
1263	adde	26,26,14
1264	add	28,20,20
1265	srdi	20,20,64-1
1266	or	27,27,18
1267	ld	18,8*9(5)
1268	adde	27,27,15
1269	ld	15,8*3(4)
1270	add	29,21,21
1271	srdi	21,21,64-1
1272	or	28,28,19
1273	ld	19,8*10(5)
1274	mulld	9,10,10
1275	mulhdu	10,10,10
1276	adde	28,28,16
1277	std	22,8*1(5)
1278	add	22,18,18
1279	srdi	18,18,64-1
1280	or	29,29,20
1281	ld	20,8*11(5)
1282	adde	29,29,17
1283	ldu	17,8*4(4)
1284	std	23,8*2(5)
1285	add	23,19,19
1286	srdi	19,19,64-1
1287	or	22,22,21
1288	ld	21,8*12(5)
1289	mulld	11,12,12
1290	mulhdu	12,12,12
1291	adde	22,22,9
1292	std	24,8*3(5)
1293	add	24,20,20
1294	srdi	20,20,64-1
1295	or	23,23,18
1296	ld	18,8*13(5)
1297	adde	23,23,10
1298	std	25,8*4(5)
1299	std	26,8*5(5)
1300	std	27,8*6(5)
1301	std	28,8*7(5)
1302	stdu	29,8*8(5)
1303	add	25,21,21
1304	srdi	21,21,64-1
1305	or	24,24,19
1306	bdnz	.Lsqr4x_shift_n_add
1307	ld	4,8*7(1)
1308	ld	7,8*8(1)
1309
1310	mulld	14,15,15
1311	mulhdu	15,15,15
1312	std	22,8*1(5)
1313	ld	22,8*12(1)
1314	ld	19,8*6(5)
1315	adde	24,24,11
1316	add	26,18,18
1317	srdi	18,18,64-1
1318	or	25,25,20
1319	ld	20,8*7(5)
1320	adde	25,25,12
1321	add	27,19,19
1322	srdi	19,19,64-1
1323	or	26,26,21
1324	mulld	16,17,17
1325	mulhdu	17,17,17
1326	adde	26,26,14
1327	add	28,20,20
1328	srdi	20,20,64-1
1329	or	27,27,18
1330	std	23,8*2(5)
1331	ld	23,8*13(1)
1332	adde	27,27,15
1333	or	28,28,19
1334	ld	9,8*1(4)
1335	ld	10,8*2(4)
1336	adde	28,28,16
1337	ld	11,8*3(4)
1338	ld	12,8*4(4)
1339	adde	29,17,20
1340	ld	14,8*5(4)
1341	ld	15,8*6(4)
1342
1343
1344
1345	mulld	31,7,22
1346	li	30,8
1347	ld	16,8*7(4)
1348	add	6,4,8
1349	ldu	17,8*8(4)
1350	std	24,8*3(5)
1351	ld	24,8*14(1)
1352	std	25,8*4(5)
1353	ld	25,8*15(1)
1354	std	26,8*5(5)
1355	ld	26,8*16(1)
1356	std	27,8*6(5)
1357	ld	27,8*17(1)
1358	std	28,8*7(5)
1359	ld	28,8*18(1)
1360	std	29,8*8(5)
1361	ld	29,8*19(1)
1362	addi	5,1,8*11
1363	mtctr	30
1364	b	.Lsqr8x_reduction
1365
1366.align	5
1367.Lsqr8x_reduction:
1368
1369	mulld	19,10,31
1370	mulld	20,11,31
1371	stdu	31,8(5)
1372	mulld	21,12,31
1373
1374	addic	22,22,-1
1375	mulld	18,14,31
1376	adde	22,23,19
1377	mulld	19,15,31
1378	adde	23,24,20
1379	mulld	20,16,31
1380	adde	24,25,21
1381	mulld	21,17,31
1382	adde	25,26,18
1383	mulhdu	18,9,31
1384	adde	26,27,19
1385	mulhdu	19,10,31
1386	adde	27,28,20
1387	mulhdu	20,11,31
1388	adde	28,29,21
1389	mulhdu	21,12,31
1390	addze	29,0
1391	addc	22,22,18
1392	mulhdu	18,14,31
1393	adde	23,23,19
1394	mulhdu	19,15,31
1395	adde	24,24,20
1396	mulhdu	20,16,31
1397	adde	25,25,21
1398	mulhdu	21,17,31
1399	mulld	31,7,22
1400	adde	26,26,18
1401	adde	27,27,19
1402	adde	28,28,20
1403	adde	29,29,21
1404	bdnz	.Lsqr8x_reduction
1405
1406	ld	18,8*1(5)
1407	ld	19,8*2(5)
1408	ld	20,8*3(5)
1409	ld	21,8*4(5)
1410	subi	3,5,8*7
1411	cmpld	6,4
1412	addc	22,22,18
1413	ld	18,8*5(5)
1414	adde	23,23,19
1415	ld	19,8*6(5)
1416	adde	24,24,20
1417	ld	20,8*7(5)
1418	adde	25,25,21
1419	ld	21,8*8(5)
1420	adde	26,26,18
1421	adde	27,27,19
1422	adde	28,28,20
1423	adde	29,29,21
1424
1425	beq	.Lsqr8x8_post_condition
1426
1427	ld	7,8*0(3)
1428	ld	9,8*1(4)
1429	ld	10,8*2(4)
1430	ld	11,8*3(4)
1431	ld	12,8*4(4)
1432	ld	14,8*5(4)
1433	ld	15,8*6(4)
1434	ld	16,8*7(4)
1435	ldu	17,8*8(4)
1436	li	30,0
1437
1438.align	5
1439.Lsqr8x_tail:
1440	mulld	18,9,7
1441	addze	31,0
1442	mulld	19,10,7
1443	addi	30,30,8
1444	mulld	20,11,7
1445	andi.	30,30,8*8-1
1446	mulld	21,12,7
1447	addc	22,22,18
1448	mulld	18,14,7
1449	adde	23,23,19
1450	mulld	19,15,7
1451	adde	24,24,20
1452	mulld	20,16,7
1453	adde	25,25,21
1454	mulld	21,17,7
1455	adde	26,26,18
1456	mulhdu	18,9,7
1457	adde	27,27,19
1458	mulhdu	19,10,7
1459	adde	28,28,20
1460	mulhdu	20,11,7
1461	adde	29,29,21
1462	mulhdu	21,12,7
1463	addze	31,31
1464	stdu	22,8(5)
1465	addc	22,23,18
1466	mulhdu	18,14,7
1467	adde	23,24,19
1468	mulhdu	19,15,7
1469	adde	24,25,20
1470	mulhdu	20,16,7
1471	adde	25,26,21
1472	mulhdu	21,17,7
1473	ldx	7,3,30
1474	adde	26,27,18
1475	adde	27,28,19
1476	adde	28,29,20
1477	adde	29,31,21
1478
1479	bne	.Lsqr8x_tail
1480
1481
1482	ld	9,8*1(5)
1483	ld	31,8*10(1)
1484	cmpld	6,4
1485	ld	10,8*2(5)
1486	sub	20,6,8
1487	ld	11,8*3(5)
1488	ld	12,8*4(5)
1489	ld	14,8*5(5)
1490	ld	15,8*6(5)
1491	ld	16,8*7(5)
1492	ld	17,8*8(5)
1493	beq	.Lsqr8x_tail_break
1494
1495	addc	22,22,9
1496	ld	9,8*1(4)
1497	adde	23,23,10
1498	ld	10,8*2(4)
1499	adde	24,24,11
1500	ld	11,8*3(4)
1501	adde	25,25,12
1502	ld	12,8*4(4)
1503	adde	26,26,14
1504	ld	14,8*5(4)
1505	adde	27,27,15
1506	ld	15,8*6(4)
1507	adde	28,28,16
1508	ld	16,8*7(4)
1509	adde	29,29,17
1510	ldu	17,8*8(4)
1511
1512	b	.Lsqr8x_tail
1513
1514.align	5
1515.Lsqr8x_tail_break:
1516	ld	7,8*8(1)
1517	ld	21,8*9(1)
1518	addi	30,5,8*8
1519
1520	addic	31,31,-1
1521	adde	18,22,9
1522	ld	22,8*8(3)
1523	ld	9,8*1(20)
1524	adde	19,23,10
1525	ld	23,8*9(3)
1526	ld	10,8*2(20)
1527	adde	24,24,11
1528	ld	11,8*3(20)
1529	adde	25,25,12
1530	ld	12,8*4(20)
1531	adde	26,26,14
1532	ld	14,8*5(20)
1533	adde	27,27,15
1534	ld	15,8*6(20)
1535	adde	28,28,16
1536	ld	16,8*7(20)
1537	adde	29,29,17
1538	ld	17,8*8(20)
1539	addi	4,20,8*8
1540	addze	20,0
1541	mulld	31,7,22
1542	std	18,8*1(5)
1543	cmpld	30,21
1544	std	19,8*2(5)
1545	li	30,8
1546	std	24,8*3(5)
1547	ld	24,8*10(3)
1548	std	25,8*4(5)
1549	ld	25,8*11(3)
1550	std	26,8*5(5)
1551	ld	26,8*12(3)
1552	std	27,8*6(5)
1553	ld	27,8*13(3)
1554	std	28,8*7(5)
1555	ld	28,8*14(3)
1556	std	29,8*8(5)
1557	ld	29,8*15(3)
1558	std	20,8*10(1)
1559	addi	5,3,8*7
1560	mtctr	30
1561	bne	.Lsqr8x_reduction
1562
1563
1564
1565
1566
1567
1568	ld	3,8*6(1)
1569	srwi	30,8,6
1570	mr	7,5
1571	addi	5,5,8*8
1572	subi	30,30,1
1573	subfc	18,9,22
1574	subfe	19,10,23
1575	mr	31,20
1576	mr	6,3
1577
1578	mtctr	30
1579	b	.Lsqr8x_sub
1580
1581.align	5
1582.Lsqr8x_sub:
1583	ld	9,8*1(4)
1584	ld	22,8*1(5)
1585	ld	10,8*2(4)
1586	ld	23,8*2(5)
1587	subfe	20,11,24
1588	ld	11,8*3(4)
1589	ld	24,8*3(5)
1590	subfe	21,12,25
1591	ld	12,8*4(4)
1592	ld	25,8*4(5)
1593	std	18,8*1(3)
1594	subfe	18,14,26
1595	ld	14,8*5(4)
1596	ld	26,8*5(5)
1597	std	19,8*2(3)
1598	subfe	19,15,27
1599	ld	15,8*6(4)
1600	ld	27,8*6(5)
1601	std	20,8*3(3)
1602	subfe	20,16,28
1603	ld	16,8*7(4)
1604	ld	28,8*7(5)
1605	std	21,8*4(3)
1606	subfe	21,17,29
1607	ldu	17,8*8(4)
1608	ldu	29,8*8(5)
1609	std	18,8*5(3)
1610	subfe	18,9,22
1611	std	19,8*6(3)
1612	subfe	19,10,23
1613	std	20,8*7(3)
1614	stdu	21,8*8(3)
1615	bdnz	.Lsqr8x_sub
1616
1617	srwi	30,8,5
1618	ld	9,8*1(6)
1619	ld	22,8*1(7)
1620	subi	30,30,1
1621	ld	10,8*2(6)
1622	ld	23,8*2(7)
1623	subfe	20,11,24
1624	ld	11,8*3(6)
1625	ld	24,8*3(7)
1626	subfe	21,12,25
1627	ld	12,8*4(6)
1628	ldu	25,8*4(7)
1629	std	18,8*1(3)
1630	subfe	18,14,26
1631	std	19,8*2(3)
1632	subfe	19,15,27
1633	std	20,8*3(3)
1634	subfe	20,16,28
1635	std	21,8*4(3)
1636	subfe	21,17,29
1637	std	18,8*5(3)
1638	subfe	31,0,31
1639	std	19,8*6(3)
1640	std	20,8*7(3)
1641	std	21,8*8(3)
1642
1643	addi	5,1,8*11
1644	mtctr	30
1645
1646.Lsqr4x_cond_copy:
1647	andc	9,9,31
1648	std	0,-8*3(7)
1649	and	22,22,31
1650	std	0,-8*2(7)
1651	andc	10,10,31
1652	std	0,-8*1(7)
1653	and	23,23,31
1654	std	0,-8*0(7)
1655	andc	11,11,31
1656	std	0,8*1(5)
1657	and	24,24,31
1658	std	0,8*2(5)
1659	andc	12,12,31
1660	std	0,8*3(5)
1661	and	25,25,31
1662	stdu	0,8*4(5)
1663	or	18,9,22
1664	ld	9,8*5(6)
1665	ld	22,8*1(7)
1666	or	19,10,23
1667	ld	10,8*6(6)
1668	ld	23,8*2(7)
1669	or	20,11,24
1670	ld	11,8*7(6)
1671	ld	24,8*3(7)
1672	or	21,12,25
1673	ld	12,8*8(6)
1674	ldu	25,8*4(7)
1675	std	18,8*1(6)
1676	std	19,8*2(6)
1677	std	20,8*3(6)
1678	stdu	21,8*4(6)
1679	bdnz	.Lsqr4x_cond_copy
1680
1681	ld	4,0(1)
1682	andc	9,9,31
1683	and	22,22,31
1684	andc	10,10,31
1685	and	23,23,31
1686	andc	11,11,31
1687	and	24,24,31
1688	andc	12,12,31
1689	and	25,25,31
1690	or	18,9,22
1691	or	19,10,23
1692	or	20,11,24
1693	or	21,12,25
1694	std	18,8*1(6)
1695	std	19,8*2(6)
1696	std	20,8*3(6)
1697	std	21,8*4(6)
1698
1699	b	.Lsqr8x_done
1700
1701.align	5
1702.Lsqr8x8_post_condition:
1703	ld	3,8*6(1)
1704	ld	4,0(1)
1705	addze	31,0
1706
1707
1708	subfc	22,9,22
1709	subfe	23,10,23
1710	std	0,8*12(1)
1711	std	0,8*13(1)
1712	subfe	24,11,24
1713	std	0,8*14(1)
1714	std	0,8*15(1)
1715	subfe	25,12,25
1716	std	0,8*16(1)
1717	std	0,8*17(1)
1718	subfe	26,14,26
1719	std	0,8*18(1)
1720	std	0,8*19(1)
1721	subfe	27,15,27
1722	std	0,8*20(1)
1723	std	0,8*21(1)
1724	subfe	28,16,28
1725	std	0,8*22(1)
1726	std	0,8*23(1)
1727	subfe	29,17,29
1728	std	0,8*24(1)
1729	std	0,8*25(1)
1730	subfe	31,0,31
1731	std	0,8*26(1)
1732	std	0,8*27(1)
1733
1734	and	9,9,31
1735	and	10,10,31
1736	addc	22,22,9
1737	and	11,11,31
1738	adde	23,23,10
1739	and	12,12,31
1740	adde	24,24,11
1741	and	14,14,31
1742	adde	25,25,12
1743	and	15,15,31
1744	adde	26,26,14
1745	and	16,16,31
1746	adde	27,27,15
1747	and	17,17,31
1748	adde	28,28,16
1749	adde	29,29,17
1750	std	22,8*1(3)
1751	std	23,8*2(3)
1752	std	24,8*3(3)
1753	std	25,8*4(3)
1754	std	26,8*5(3)
1755	std	27,8*6(3)
1756	std	28,8*7(3)
1757	std	29,8*8(3)
1758
1759.Lsqr8x_done:
1760	std	0,8*8(1)
1761	std	0,8*10(1)
1762
1763	ld	14,-8*18(4)
1764	li	3,1
1765	ld	15,-8*17(4)
1766	ld	16,-8*16(4)
1767	ld	17,-8*15(4)
1768	ld	18,-8*14(4)
1769	ld	19,-8*13(4)
1770	ld	20,-8*12(4)
1771	ld	21,-8*11(4)
1772	ld	22,-8*10(4)
1773	ld	23,-8*9(4)
1774	ld	24,-8*8(4)
1775	ld	25,-8*7(4)
1776	ld	26,-8*6(4)
1777	ld	27,-8*5(4)
1778	ld	28,-8*4(4)
1779	ld	29,-8*3(4)
1780	ld	30,-8*2(4)
1781	ld	31,-8*1(4)
1782	mr	1,4
1783	blr
1784.long	0
1785.byte	0,12,4,0x20,0x80,18,6,0
1786.long	0
1787.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1788.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1789.align	2
1790