• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This code was translated into a form compatible with 5a from the public
6// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
7
8// +build arm,!gccgo,!appengine
9
10DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
11DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
12DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
13DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
14DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
15GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
16
17// Warning: the linker may use R11 to synthesize certain instructions. Please
18// take care and verify that no synthetic instructions use it.
19
20TEXT poly1305_init_ext_armv6<>(SB),4,$-4
21  MOVM.DB.W [R4-R11], (R13)
22  MOVM.IA.W (R1), [R2-R5]
23  MOVW $poly1305_init_constants_armv6<>(SB), R7
24  MOVW R2, R8
25  MOVW R2>>26, R9
26  MOVW R3>>20, g
27  MOVW R4>>14, R11
28  MOVW R5>>8, R12
29  ORR R3<<6, R9, R9
30  ORR R4<<12, g, g
31  ORR R5<<18, R11, R11
32  MOVM.IA (R7), [R2-R6]
33  AND R8, R2, R2
34  AND R9, R3, R3
35  AND g, R4, R4
36  AND R11, R5, R5
37  AND R12, R6, R6
38  MOVM.IA.W [R2-R6], (R0)
39  EOR R2, R2, R2
40  EOR R3, R3, R3
41  EOR R4, R4, R4
42  EOR R5, R5, R5
43  EOR R6, R6, R6
44  MOVM.IA.W [R2-R6], (R0)
45  MOVM.IA.W (R1), [R2-R5]
46  MOVM.IA [R2-R6], (R0)
47  MOVM.IA.W (R13), [R4-R11]
48  RET
49
50#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
51  MOVBU (offset+0)(Rsrc), Rtmp; \
52  MOVBU Rtmp, (offset+0)(Rdst); \
53  MOVBU (offset+1)(Rsrc), Rtmp; \
54  MOVBU Rtmp, (offset+1)(Rdst); \
55  MOVBU (offset+2)(Rsrc), Rtmp; \
56  MOVBU Rtmp, (offset+2)(Rdst); \
57  MOVBU (offset+3)(Rsrc), Rtmp; \
58  MOVBU Rtmp, (offset+3)(Rdst)
59
60TEXT poly1305_blocks_armv6<>(SB),4,$-4
61  MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
62  SUB $128, R13
63  MOVW R0, 36(R13)
64  MOVW R1, 40(R13)
65  MOVW R2, 44(R13)
66  MOVW R1, R14
67  MOVW R2, R12
68  MOVW 56(R0), R8
69  WORD $0xe1180008 // TST R8, R8 not working see issue 5921
70  EOR R6, R6, R6
71  MOVW.EQ $(1<<24), R6
72  MOVW R6, 32(R13)
73  ADD $64, R13, g
74  MOVM.IA (R0), [R0-R9]
75  MOVM.IA [R0-R4], (g)
76  CMP $16, R12
77  BLO poly1305_blocks_armv6_done
78poly1305_blocks_armv6_mainloop:
79  WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
80  BEQ poly1305_blocks_armv6_mainloop_aligned
81  ADD $48, R13, g
82  MOVW_UNALIGNED(R14, g, R0, 0)
83  MOVW_UNALIGNED(R14, g, R0, 4)
84  MOVW_UNALIGNED(R14, g, R0, 8)
85  MOVW_UNALIGNED(R14, g, R0, 12)
86  MOVM.IA (g), [R0-R3]
87  ADD $16, R14
88  B poly1305_blocks_armv6_mainloop_loaded
89poly1305_blocks_armv6_mainloop_aligned:
90  MOVM.IA.W (R14), [R0-R3]
91poly1305_blocks_armv6_mainloop_loaded:
92  MOVW R0>>26, g
93  MOVW R1>>20, R11
94  MOVW R2>>14, R12
95  MOVW R14, 40(R13)
96  MOVW R3>>8, R4
97  ORR R1<<6, g, g
98  ORR R2<<12, R11, R11
99  ORR R3<<18, R12, R12
100  BIC $0xfc000000, R0, R0
101  BIC $0xfc000000, g, g
102  MOVW 32(R13), R3
103  BIC $0xfc000000, R11, R11
104  BIC $0xfc000000, R12, R12
105  ADD R0, R5, R5
106  ADD g, R6, R6
107  ORR R3, R4, R4
108  ADD R11, R7, R7
109  ADD $64, R13, R14
110  ADD R12, R8, R8
111  ADD R4, R9, R9
112  MOVM.IA (R14), [R0-R4]
113  MULLU R4, R5, (R11, g)
114  MULLU R3, R5, (R14, R12)
115  MULALU R3, R6, (R11, g)
116  MULALU R2, R6, (R14, R12)
117  MULALU R2, R7, (R11, g)
118  MULALU R1, R7, (R14, R12)
119  ADD R4<<2, R4, R4
120  ADD R3<<2, R3, R3
121  MULALU R1, R8, (R11, g)
122  MULALU R0, R8, (R14, R12)
123  MULALU R0, R9, (R11, g)
124  MULALU R4, R9, (R14, R12)
125  MOVW g, 24(R13)
126  MOVW R11, 28(R13)
127  MOVW R12, 16(R13)
128  MOVW R14, 20(R13)
129  MULLU R2, R5, (R11, g)
130  MULLU R1, R5, (R14, R12)
131  MULALU R1, R6, (R11, g)
132  MULALU R0, R6, (R14, R12)
133  MULALU R0, R7, (R11, g)
134  MULALU R4, R7, (R14, R12)
135  ADD R2<<2, R2, R2
136  ADD R1<<2, R1, R1
137  MULALU R4, R8, (R11, g)
138  MULALU R3, R8, (R14, R12)
139  MULALU R3, R9, (R11, g)
140  MULALU R2, R9, (R14, R12)
141  MOVW g, 8(R13)
142  MOVW R11, 12(R13)
143  MOVW R12, 0(R13)
144  MOVW R14, w+4(SP)
145  MULLU R0, R5, (R11, g)
146  MULALU R4, R6, (R11, g)
147  MULALU R3, R7, (R11, g)
148  MULALU R2, R8, (R11, g)
149  MULALU R1, R9, (R11, g)
150  MOVM.IA (R13), [R0-R7]
151  MOVW g>>26, R12
152  MOVW R4>>26, R14
153  ORR R11<<6, R12, R12
154  ORR R5<<6, R14, R14
155  BIC $0xfc000000, g, g
156  BIC $0xfc000000, R4, R4
157  ADD.S R12, R0, R0
158  ADC $0, R1, R1
159  ADD.S R14, R6, R6
160  ADC $0, R7, R7
161  MOVW R0>>26, R12
162  MOVW R6>>26, R14
163  ORR R1<<6, R12, R12
164  ORR R7<<6, R14, R14
165  BIC $0xfc000000, R0, R0
166  BIC $0xfc000000, R6, R6
167  ADD R14<<2, R14, R14
168  ADD.S R12, R2, R2
169  ADC $0, R3, R3
170  ADD R14, g, g
171  MOVW R2>>26, R12
172  MOVW g>>26, R14
173  ORR R3<<6, R12, R12
174  BIC $0xfc000000, g, R5
175  BIC $0xfc000000, R2, R7
176  ADD R12, R4, R4
177  ADD R14, R0, R0
178  MOVW R4>>26, R12
179  BIC $0xfc000000, R4, R8
180  ADD R12, R6, R9
181  MOVW w+44(SP), R12
182  MOVW w+40(SP), R14
183  MOVW R0, R6
184  CMP $32, R12
185  SUB $16, R12, R12
186  MOVW R12, 44(R13)
187  BHS poly1305_blocks_armv6_mainloop
188poly1305_blocks_armv6_done:
189  MOVW 36(R13), R12
190  MOVW R5, 20(R12)
191  MOVW R6, 24(R12)
192  MOVW R7, 28(R12)
193  MOVW R8, 32(R12)
194  MOVW R9, 36(R12)
195  ADD $128, R13, R13
196  MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
197  RET
198
199#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
200  MOVBU.P 1(Rsrc), Rtmp; \
201  MOVBU.P Rtmp, 1(Rdst); \
202  MOVBU.P 1(Rsrc), Rtmp; \
203  MOVBU.P Rtmp, 1(Rdst)
204
205#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
206  MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
207  MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
208
209TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
210  MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
211  SUB $16, R13, R13
212  MOVW R0, R5
213  MOVW R1, R6
214  MOVW R2, R7
215  MOVW R3, R8
216  AND.S R2, R2, R2
217  BEQ poly1305_finish_ext_armv6_noremaining
218  EOR R0, R0
219  MOVW R13, R9
220  MOVW R0, 0(R13)
221  MOVW R0, 4(R13)
222  MOVW R0, 8(R13)
223  MOVW R0, 12(R13)
224  WORD $0xe3110003 // TST R1, #3 not working see issue 5921
225  BEQ poly1305_finish_ext_armv6_aligned
226  WORD $0xe3120008 // TST R2, #8 not working see issue 5921
227  BEQ poly1305_finish_ext_armv6_skip8
228  MOVWP_UNALIGNED(R1, R9, g)
229  MOVWP_UNALIGNED(R1, R9, g)
230poly1305_finish_ext_armv6_skip8:
231  WORD $0xe3120004 // TST $4, R2 not working see issue 5921
232  BEQ poly1305_finish_ext_armv6_skip4
233  MOVWP_UNALIGNED(R1, R9, g)
234poly1305_finish_ext_armv6_skip4:
235  WORD $0xe3120002 // TST $2, R2 not working see issue 5921
236  BEQ poly1305_finish_ext_armv6_skip2
237  MOVHUP_UNALIGNED(R1, R9, g)
238  B poly1305_finish_ext_armv6_skip2
239poly1305_finish_ext_armv6_aligned:
240  WORD $0xe3120008 // TST R2, #8 not working see issue 5921
241  BEQ poly1305_finish_ext_armv6_skip8_aligned
242  MOVM.IA.W (R1), [g-R11]
243  MOVM.IA.W [g-R11], (R9)
244poly1305_finish_ext_armv6_skip8_aligned:
245  WORD $0xe3120004 // TST $4, R2 not working see issue 5921
246  BEQ poly1305_finish_ext_armv6_skip4_aligned
247  MOVW.P 4(R1), g
248  MOVW.P g, 4(R9)
249poly1305_finish_ext_armv6_skip4_aligned:
250  WORD $0xe3120002 // TST $2, R2 not working see issue 5921
251  BEQ poly1305_finish_ext_armv6_skip2
252  MOVHU.P 2(R1), g
253  MOVH.P g, 2(R9)
254poly1305_finish_ext_armv6_skip2:
255  WORD $0xe3120001 // TST $1, R2 not working see issue 5921
256  BEQ poly1305_finish_ext_armv6_skip1
257  MOVBU.P 1(R1), g
258  MOVBU.P g, 1(R9)
259poly1305_finish_ext_armv6_skip1:
260  MOVW $1, R11
261  MOVBU R11, 0(R9)
262  MOVW R11, 56(R5)
263  MOVW R5, R0
264  MOVW R13, R1
265  MOVW $16, R2
266  BL poly1305_blocks_armv6<>(SB)
267poly1305_finish_ext_armv6_noremaining:
268  MOVW 20(R5), R0
269  MOVW 24(R5), R1
270  MOVW 28(R5), R2
271  MOVW 32(R5), R3
272  MOVW 36(R5), R4
273  MOVW R4>>26, R12
274  BIC $0xfc000000, R4, R4
275  ADD R12<<2, R12, R12
276  ADD R12, R0, R0
277  MOVW R0>>26, R12
278  BIC $0xfc000000, R0, R0
279  ADD R12, R1, R1
280  MOVW R1>>26, R12
281  BIC $0xfc000000, R1, R1
282  ADD R12, R2, R2
283  MOVW R2>>26, R12
284  BIC $0xfc000000, R2, R2
285  ADD R12, R3, R3
286  MOVW R3>>26, R12
287  BIC $0xfc000000, R3, R3
288  ADD R12, R4, R4
289  ADD $5, R0, R6
290  MOVW R6>>26, R12
291  BIC $0xfc000000, R6, R6
292  ADD R12, R1, R7
293  MOVW R7>>26, R12
294  BIC $0xfc000000, R7, R7
295  ADD R12, R2, g
296  MOVW g>>26, R12
297  BIC $0xfc000000, g, g
298  ADD R12, R3, R11
299  MOVW $-(1<<26), R12
300  ADD R11>>26, R12, R12
301  BIC $0xfc000000, R11, R11
302  ADD R12, R4, R14
303  MOVW R14>>31, R12
304  SUB $1, R12
305  AND R12, R6, R6
306  AND R12, R7, R7
307  AND R12, g, g
308  AND R12, R11, R11
309  AND R12, R14, R14
310  MVN R12, R12
311  AND R12, R0, R0
312  AND R12, R1, R1
313  AND R12, R2, R2
314  AND R12, R3, R3
315  AND R12, R4, R4
316  ORR R6, R0, R0
317  ORR R7, R1, R1
318  ORR g, R2, R2
319  ORR R11, R3, R3
320  ORR R14, R4, R4
321  ORR R1<<26, R0, R0
322  MOVW R1>>6, R1
323  ORR R2<<20, R1, R1
324  MOVW R2>>12, R2
325  ORR R3<<14, R2, R2
326  MOVW R3>>18, R3
327  ORR R4<<8, R3, R3
328  MOVW 40(R5), R6
329  MOVW 44(R5), R7
330  MOVW 48(R5), g
331  MOVW 52(R5), R11
332  ADD.S R6, R0, R0
333  ADC.S R7, R1, R1
334  ADC.S g, R2, R2
335  ADC.S R11, R3, R3
336  MOVM.IA [R0-R3], (R8)
337  MOVW R5, R12
338  EOR R0, R0, R0
339  EOR R1, R1, R1
340  EOR R2, R2, R2
341  EOR R3, R3, R3
342  EOR R4, R4, R4
343  EOR R5, R5, R5
344  EOR R6, R6, R6
345  EOR R7, R7, R7
346  MOVM.IA.W [R0-R7], (R12)
347  MOVM.IA [R0-R7], (R12)
348  ADD $16, R13, R13
349  MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
350  RET
351
352// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
353TEXT ·poly1305_auth_armv6(SB),0,$280-16
354  MOVW  out+0(FP), R4
355  MOVW  m+4(FP), R5
356  MOVW  mlen+8(FP), R6
357  MOVW  key+12(FP), R7
358
359  MOVW R13, R8
360  BIC $63, R13
361  SUB $64, R13, R13
362  MOVW  R13, R0
363  MOVW  R7, R1
364  BL poly1305_init_ext_armv6<>(SB)
365  BIC.S $15, R6, R2
366  BEQ poly1305_auth_armv6_noblocks
367  MOVW R13, R0
368  MOVW R5, R1
369  ADD R2, R5, R5
370  SUB R2, R6, R6
371  BL poly1305_blocks_armv6<>(SB)
372poly1305_auth_armv6_noblocks:
373  MOVW R13, R0
374  MOVW R5, R1
375  MOVW R6, R2
376  MOVW R4, R3
377  BL poly1305_finish_ext_armv6<>(SB)
378  MOVW R8, R13
379  RET
380