• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#include "arm_arch.h"
2
3.text
4
5.global	sha1_block_data_order
6.type	sha1_block_data_order,%function
7
8.align	2
9sha1_block_data_order:
10	stmdb	sp!,{r4-r12,lr}
11	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
12	ldmia	r0,{r3,r4,r5,r6,r7}
13.Lloop:
14	ldr	r8,.LK_00_19
15	mov	r14,sp
16	sub	sp,sp,#15*4
17	mov	r5,r5,ror#30
18	mov	r6,r6,ror#30
19	mov	r7,r7,ror#30		@ [6]
20.L_00_15:
21#if __ARM_ARCH__<7
22	ldrb	r10,[r1,#2]
23	ldrb	r9,[r1,#3]
24	ldrb	r11,[r1,#1]
25	add	r7,r8,r7,ror#2			@ E+=K_00_19
26	ldrb	r12,[r1],#4
27	orr	r9,r9,r10,lsl#8
28	eor	r10,r5,r6			@ F_xx_xx
29	orr	r9,r9,r11,lsl#16
30	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
31	orr	r9,r9,r12,lsl#24
32#else
33	ldr	r9,[r1],#4			@ handles unaligned
34	add	r7,r8,r7,ror#2			@ E+=K_00_19
35	eor	r10,r5,r6			@ F_xx_xx
36	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
37#ifdef __ARMEL__
38	rev	r9,r9				@ byte swap
39#endif
40#endif
41	and	r10,r4,r10,ror#2
42	add	r7,r7,r9			@ E+=X[i]
43	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
44	str	r9,[r14,#-4]!
45	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
46#if __ARM_ARCH__<7
47	ldrb	r10,[r1,#2]
48	ldrb	r9,[r1,#3]
49	ldrb	r11,[r1,#1]
50	add	r6,r8,r6,ror#2			@ E+=K_00_19
51	ldrb	r12,[r1],#4
52	orr	r9,r9,r10,lsl#8
53	eor	r10,r4,r5			@ F_xx_xx
54	orr	r9,r9,r11,lsl#16
55	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
56	orr	r9,r9,r12,lsl#24
57#else
58	ldr	r9,[r1],#4			@ handles unaligned
59	add	r6,r8,r6,ror#2			@ E+=K_00_19
60	eor	r10,r4,r5			@ F_xx_xx
61	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
62#ifdef __ARMEL__
63	rev	r9,r9				@ byte swap
64#endif
65#endif
66	and	r10,r3,r10,ror#2
67	add	r6,r6,r9			@ E+=X[i]
68	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
69	str	r9,[r14,#-4]!
70	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
71#if __ARM_ARCH__<7
72	ldrb	r10,[r1,#2]
73	ldrb	r9,[r1,#3]
74	ldrb	r11,[r1,#1]
75	add	r5,r8,r5,ror#2			@ E+=K_00_19
76	ldrb	r12,[r1],#4
77	orr	r9,r9,r10,lsl#8
78	eor	r10,r3,r4			@ F_xx_xx
79	orr	r9,r9,r11,lsl#16
80	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
81	orr	r9,r9,r12,lsl#24
82#else
83	ldr	r9,[r1],#4			@ handles unaligned
84	add	r5,r8,r5,ror#2			@ E+=K_00_19
85	eor	r10,r3,r4			@ F_xx_xx
86	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
87#ifdef __ARMEL__
88	rev	r9,r9				@ byte swap
89#endif
90#endif
91	and	r10,r7,r10,ror#2
92	add	r5,r5,r9			@ E+=X[i]
93	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
94	str	r9,[r14,#-4]!
95	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
96#if __ARM_ARCH__<7
97	ldrb	r10,[r1,#2]
98	ldrb	r9,[r1,#3]
99	ldrb	r11,[r1,#1]
100	add	r4,r8,r4,ror#2			@ E+=K_00_19
101	ldrb	r12,[r1],#4
102	orr	r9,r9,r10,lsl#8
103	eor	r10,r7,r3			@ F_xx_xx
104	orr	r9,r9,r11,lsl#16
105	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
106	orr	r9,r9,r12,lsl#24
107#else
108	ldr	r9,[r1],#4			@ handles unaligned
109	add	r4,r8,r4,ror#2			@ E+=K_00_19
110	eor	r10,r7,r3			@ F_xx_xx
111	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
112#ifdef __ARMEL__
113	rev	r9,r9				@ byte swap
114#endif
115#endif
116	and	r10,r6,r10,ror#2
117	add	r4,r4,r9			@ E+=X[i]
118	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
119	str	r9,[r14,#-4]!
120	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
121#if __ARM_ARCH__<7
122	ldrb	r10,[r1,#2]
123	ldrb	r9,[r1,#3]
124	ldrb	r11,[r1,#1]
125	add	r3,r8,r3,ror#2			@ E+=K_00_19
126	ldrb	r12,[r1],#4
127	orr	r9,r9,r10,lsl#8
128	eor	r10,r6,r7			@ F_xx_xx
129	orr	r9,r9,r11,lsl#16
130	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
131	orr	r9,r9,r12,lsl#24
132#else
133	ldr	r9,[r1],#4			@ handles unaligned
134	add	r3,r8,r3,ror#2			@ E+=K_00_19
135	eor	r10,r6,r7			@ F_xx_xx
136	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
137#ifdef __ARMEL__
138	rev	r9,r9				@ byte swap
139#endif
140#endif
141	and	r10,r5,r10,ror#2
142	add	r3,r3,r9			@ E+=X[i]
143	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
144	str	r9,[r14,#-4]!
145	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
146	teq	r14,sp
147	bne	.L_00_15		@ [((11+4)*5+2)*3]
148	sub	sp,sp,#5*4
149#if __ARM_ARCH__<7
150	ldrb	r10,[r1,#2]
151	ldrb	r9,[r1,#3]
152	ldrb	r11,[r1,#1]
153	add	r7,r8,r7,ror#2			@ E+=K_00_19
154	ldrb	r12,[r1],#4
155	orr	r9,r9,r10,lsl#8
156	eor	r10,r5,r6			@ F_xx_xx
157	orr	r9,r9,r11,lsl#16
158	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
159	orr	r9,r9,r12,lsl#24
160#else
161	ldr	r9,[r1],#4			@ handles unaligned
162	add	r7,r8,r7,ror#2			@ E+=K_00_19
163	eor	r10,r5,r6			@ F_xx_xx
164	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
165#ifdef __ARMEL__
166	rev	r9,r9				@ byte swap
167#endif
168#endif
169	and	r10,r4,r10,ror#2
170	add	r7,r7,r9			@ E+=X[i]
171	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
172	str	r9,[r14,#-4]!
173	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
174	ldr	r9,[r14,#15*4]
175	ldr	r10,[r14,#13*4]
176	ldr	r11,[r14,#7*4]
177	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
178	ldr	r12,[r14,#2*4]
179	eor	r9,r9,r10
180	eor	r11,r11,r12			@ 1 cycle stall
181	eor	r10,r4,r5			@ F_xx_xx
182	mov	r9,r9,ror#31
183	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
184	eor	r9,r9,r11,ror#31
185	str	r9,[r14,#-4]!
186	and r10,r3,r10,ror#2					@ F_xx_xx
187						@ F_xx_xx
188	add	r6,r6,r9			@ E+=X[i]
189	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
190	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
191	ldr	r9,[r14,#15*4]
192	ldr	r10,[r14,#13*4]
193	ldr	r11,[r14,#7*4]
194	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
195	ldr	r12,[r14,#2*4]
196	eor	r9,r9,r10
197	eor	r11,r11,r12			@ 1 cycle stall
198	eor	r10,r3,r4			@ F_xx_xx
199	mov	r9,r9,ror#31
200	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
201	eor	r9,r9,r11,ror#31
202	str	r9,[r14,#-4]!
203	and r10,r7,r10,ror#2					@ F_xx_xx
204						@ F_xx_xx
205	add	r5,r5,r9			@ E+=X[i]
206	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
207	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
208	ldr	r9,[r14,#15*4]
209	ldr	r10,[r14,#13*4]
210	ldr	r11,[r14,#7*4]
211	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
212	ldr	r12,[r14,#2*4]
213	eor	r9,r9,r10
214	eor	r11,r11,r12			@ 1 cycle stall
215	eor	r10,r7,r3			@ F_xx_xx
216	mov	r9,r9,ror#31
217	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
218	eor	r9,r9,r11,ror#31
219	str	r9,[r14,#-4]!
220	and r10,r6,r10,ror#2					@ F_xx_xx
221						@ F_xx_xx
222	add	r4,r4,r9			@ E+=X[i]
223	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
224	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
225	ldr	r9,[r14,#15*4]
226	ldr	r10,[r14,#13*4]
227	ldr	r11,[r14,#7*4]
228	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
229	ldr	r12,[r14,#2*4]
230	eor	r9,r9,r10
231	eor	r11,r11,r12			@ 1 cycle stall
232	eor	r10,r6,r7			@ F_xx_xx
233	mov	r9,r9,ror#31
234	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
235	eor	r9,r9,r11,ror#31
236	str	r9,[r14,#-4]!
237	and r10,r5,r10,ror#2					@ F_xx_xx
238						@ F_xx_xx
239	add	r3,r3,r9			@ E+=X[i]
240	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
241	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
242
243	ldr	r8,.LK_20_39		@ [+15+16*4]
244	sub	sp,sp,#20*4
245	cmn	sp,#0			@ [+3], clear carry to denote 20_39
246.L_20_39_or_60_79:
247	ldr	r9,[r14,#15*4]
248	ldr	r10,[r14,#13*4]
249	ldr	r11,[r14,#7*4]
250	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
251	ldr	r12,[r14,#2*4]
252	eor	r9,r9,r10
253	eor	r11,r11,r12			@ 1 cycle stall
254	eor	r10,r5,r6			@ F_xx_xx
255	mov	r9,r9,ror#31
256	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
257	eor	r9,r9,r11,ror#31
258	str	r9,[r14,#-4]!
259	eor r10,r4,r10,ror#2					@ F_xx_xx
260						@ F_xx_xx
261	add	r7,r7,r9			@ E+=X[i]
262	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
263	ldr	r9,[r14,#15*4]
264	ldr	r10,[r14,#13*4]
265	ldr	r11,[r14,#7*4]
266	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
267	ldr	r12,[r14,#2*4]
268	eor	r9,r9,r10
269	eor	r11,r11,r12			@ 1 cycle stall
270	eor	r10,r4,r5			@ F_xx_xx
271	mov	r9,r9,ror#31
272	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
273	eor	r9,r9,r11,ror#31
274	str	r9,[r14,#-4]!
275	eor r10,r3,r10,ror#2					@ F_xx_xx
276						@ F_xx_xx
277	add	r6,r6,r9			@ E+=X[i]
278	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
279	ldr	r9,[r14,#15*4]
280	ldr	r10,[r14,#13*4]
281	ldr	r11,[r14,#7*4]
282	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
283	ldr	r12,[r14,#2*4]
284	eor	r9,r9,r10
285	eor	r11,r11,r12			@ 1 cycle stall
286	eor	r10,r3,r4			@ F_xx_xx
287	mov	r9,r9,ror#31
288	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
289	eor	r9,r9,r11,ror#31
290	str	r9,[r14,#-4]!
291	eor r10,r7,r10,ror#2					@ F_xx_xx
292						@ F_xx_xx
293	add	r5,r5,r9			@ E+=X[i]
294	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
295	ldr	r9,[r14,#15*4]
296	ldr	r10,[r14,#13*4]
297	ldr	r11,[r14,#7*4]
298	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
299	ldr	r12,[r14,#2*4]
300	eor	r9,r9,r10
301	eor	r11,r11,r12			@ 1 cycle stall
302	eor	r10,r7,r3			@ F_xx_xx
303	mov	r9,r9,ror#31
304	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
305	eor	r9,r9,r11,ror#31
306	str	r9,[r14,#-4]!
307	eor r10,r6,r10,ror#2					@ F_xx_xx
308						@ F_xx_xx
309	add	r4,r4,r9			@ E+=X[i]
310	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
311	ldr	r9,[r14,#15*4]
312	ldr	r10,[r14,#13*4]
313	ldr	r11,[r14,#7*4]
314	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
315	ldr	r12,[r14,#2*4]
316	eor	r9,r9,r10
317	eor	r11,r11,r12			@ 1 cycle stall
318	eor	r10,r6,r7			@ F_xx_xx
319	mov	r9,r9,ror#31
320	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
321	eor	r9,r9,r11,ror#31
322	str	r9,[r14,#-4]!
323	eor r10,r5,r10,ror#2					@ F_xx_xx
324						@ F_xx_xx
325	add	r3,r3,r9			@ E+=X[i]
326	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
327	teq	r14,sp			@ preserve carry
328	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
329	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
330
331	ldr	r8,.LK_40_59
332	sub	sp,sp,#20*4		@ [+2]
333.L_40_59:
334	ldr	r9,[r14,#15*4]
335	ldr	r10,[r14,#13*4]
336	ldr	r11,[r14,#7*4]
337	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
338	ldr	r12,[r14,#2*4]
339	eor	r9,r9,r10
340	eor	r11,r11,r12			@ 1 cycle stall
341	eor	r10,r5,r6			@ F_xx_xx
342	mov	r9,r9,ror#31
343	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
344	eor	r9,r9,r11,ror#31
345	str	r9,[r14,#-4]!
346	and r10,r4,r10,ror#2					@ F_xx_xx
347	and r11,r5,r6					@ F_xx_xx
348	add	r7,r7,r9			@ E+=X[i]
349	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
350	add	r7,r7,r11,ror#2
351	ldr	r9,[r14,#15*4]
352	ldr	r10,[r14,#13*4]
353	ldr	r11,[r14,#7*4]
354	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
355	ldr	r12,[r14,#2*4]
356	eor	r9,r9,r10
357	eor	r11,r11,r12			@ 1 cycle stall
358	eor	r10,r4,r5			@ F_xx_xx
359	mov	r9,r9,ror#31
360	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
361	eor	r9,r9,r11,ror#31
362	str	r9,[r14,#-4]!
363	and r10,r3,r10,ror#2					@ F_xx_xx
364	and r11,r4,r5					@ F_xx_xx
365	add	r6,r6,r9			@ E+=X[i]
366	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
367	add	r6,r6,r11,ror#2
368	ldr	r9,[r14,#15*4]
369	ldr	r10,[r14,#13*4]
370	ldr	r11,[r14,#7*4]
371	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
372	ldr	r12,[r14,#2*4]
373	eor	r9,r9,r10
374	eor	r11,r11,r12			@ 1 cycle stall
375	eor	r10,r3,r4			@ F_xx_xx
376	mov	r9,r9,ror#31
377	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
378	eor	r9,r9,r11,ror#31
379	str	r9,[r14,#-4]!
380	and r10,r7,r10,ror#2					@ F_xx_xx
381	and r11,r3,r4					@ F_xx_xx
382	add	r5,r5,r9			@ E+=X[i]
383	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
384	add	r5,r5,r11,ror#2
385	ldr	r9,[r14,#15*4]
386	ldr	r10,[r14,#13*4]
387	ldr	r11,[r14,#7*4]
388	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
389	ldr	r12,[r14,#2*4]
390	eor	r9,r9,r10
391	eor	r11,r11,r12			@ 1 cycle stall
392	eor	r10,r7,r3			@ F_xx_xx
393	mov	r9,r9,ror#31
394	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
395	eor	r9,r9,r11,ror#31
396	str	r9,[r14,#-4]!
397	and r10,r6,r10,ror#2					@ F_xx_xx
398	and r11,r7,r3					@ F_xx_xx
399	add	r4,r4,r9			@ E+=X[i]
400	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
401	add	r4,r4,r11,ror#2
402	ldr	r9,[r14,#15*4]
403	ldr	r10,[r14,#13*4]
404	ldr	r11,[r14,#7*4]
405	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
406	ldr	r12,[r14,#2*4]
407	eor	r9,r9,r10
408	eor	r11,r11,r12			@ 1 cycle stall
409	eor	r10,r6,r7			@ F_xx_xx
410	mov	r9,r9,ror#31
411	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
412	eor	r9,r9,r11,ror#31
413	str	r9,[r14,#-4]!
414	and r10,r5,r10,ror#2					@ F_xx_xx
415	and r11,r6,r7					@ F_xx_xx
416	add	r3,r3,r9			@ E+=X[i]
417	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
418	add	r3,r3,r11,ror#2
419	teq	r14,sp
420	bne	.L_40_59		@ [+((12+5)*5+2)*4]
421
422	ldr	r8,.LK_60_79
423	sub	sp,sp,#20*4
424	cmp	sp,#0			@ set carry to denote 60_79
425	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
426.L_done:
427	add	sp,sp,#80*4		@ "deallocate" stack frame
428	ldmia	r0,{r8,r9,r10,r11,r12}
429	add	r3,r8,r3
430	add	r4,r9,r4
431	add	r5,r10,r5,ror#2
432	add	r6,r11,r6,ror#2
433	add	r7,r12,r7,ror#2
434	stmia	r0,{r3,r4,r5,r6,r7}
435	teq	r1,r2
436	bne	.Lloop			@ [+18], total 1307
437
438#if __ARM_ARCH__>=5
439	ldmia	sp!,{r4-r12,pc}
440#else
441	ldmia	sp!,{r4-r12,lr}
442	tst	lr,#1
443	moveq	pc,lr			@ be binary compatible with V4, yet
444	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
445#endif
446.align	2
447.LK_00_19:	.word	0x5a827999
448.LK_20_39:	.word	0x6ed9eba1
449.LK_40_59:	.word	0x8f1bbcdc
450.LK_60_79:	.word	0xca62c1d6
451.size	sha1_block_data_order,.-sha1_block_data_order
452.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
453.align	2
454