• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#include "ring_core_generated/prefix_symbols_asm.h"
13#include <ring-core/arm_arch.h>
14
15#if __ARM_MAX_ARCH__>=7
16.text
17
18.section	__TEXT,__const
19.align	5
20Lrcon:
21.long	0x01,0x01,0x01,0x01
22.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
23.long	0x1b,0x1b,0x1b,0x1b
24
25.text
26
27.globl	_aes_hw_set_encrypt_key
28.private_extern	_aes_hw_set_encrypt_key
29
30.align	5
31_aes_hw_set_encrypt_key:
32Lenc_key:
33	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
34	AARCH64_VALID_CALL_TARGET
35	stp	x29,x30,[sp,#-16]!
36	add	x29,sp,#0
37	mov	x3,#-1
38	cmp	x0,#0
39	b.eq	Lenc_key_abort
40	cmp	x2,#0
41	b.eq	Lenc_key_abort
42	mov	x3,#-2
43	cmp	w1,#128
44	b.lt	Lenc_key_abort
45	cmp	w1,#256
46	b.gt	Lenc_key_abort
47	tst	w1,#0x3f
48	b.ne	Lenc_key_abort
49
50	adrp	x3,Lrcon@PAGE
51	add	x3,x3,Lrcon@PAGEOFF
52	cmp	w1,#192
53
54	eor	v0.16b,v0.16b,v0.16b
55	ld1	{v3.16b},[x0],#16
56	mov	w1,#8		// reuse w1
57	ld1	{v1.4s,v2.4s},[x3],#32
58
59	b.lt	Loop128
60	// 192-bit key support was removed.
61	b	L256
62
63.align	4
64Loop128:
65	tbl	v6.16b,{v3.16b},v2.16b
66	ext	v5.16b,v0.16b,v3.16b,#12
67	st1	{v3.4s},[x2],#16
68	aese	v6.16b,v0.16b
69	subs	w1,w1,#1
70
71	eor	v3.16b,v3.16b,v5.16b
72	ext	v5.16b,v0.16b,v5.16b,#12
73	eor	v3.16b,v3.16b,v5.16b
74	ext	v5.16b,v0.16b,v5.16b,#12
75	eor	v6.16b,v6.16b,v1.16b
76	eor	v3.16b,v3.16b,v5.16b
77	shl	v1.16b,v1.16b,#1
78	eor	v3.16b,v3.16b,v6.16b
79	b.ne	Loop128
80
81	ld1	{v1.4s},[x3]
82
83	tbl	v6.16b,{v3.16b},v2.16b
84	ext	v5.16b,v0.16b,v3.16b,#12
85	st1	{v3.4s},[x2],#16
86	aese	v6.16b,v0.16b
87
88	eor	v3.16b,v3.16b,v5.16b
89	ext	v5.16b,v0.16b,v5.16b,#12
90	eor	v3.16b,v3.16b,v5.16b
91	ext	v5.16b,v0.16b,v5.16b,#12
92	eor	v6.16b,v6.16b,v1.16b
93	eor	v3.16b,v3.16b,v5.16b
94	shl	v1.16b,v1.16b,#1
95	eor	v3.16b,v3.16b,v6.16b
96
97	tbl	v6.16b,{v3.16b},v2.16b
98	ext	v5.16b,v0.16b,v3.16b,#12
99	st1	{v3.4s},[x2],#16
100	aese	v6.16b,v0.16b
101
102	eor	v3.16b,v3.16b,v5.16b
103	ext	v5.16b,v0.16b,v5.16b,#12
104	eor	v3.16b,v3.16b,v5.16b
105	ext	v5.16b,v0.16b,v5.16b,#12
106	eor	v6.16b,v6.16b,v1.16b
107	eor	v3.16b,v3.16b,v5.16b
108	eor	v3.16b,v3.16b,v6.16b
109	st1	{v3.4s},[x2]
110	add	x2,x2,#0x50
111
112	mov	w12,#10
113	b	Ldone
114
115// 192-bit key support was removed.
116
117.align	4
118L256:
119	ld1	{v4.16b},[x0]
120	mov	w1,#7
121	mov	w12,#14
122	st1	{v3.4s},[x2],#16
123
124Loop256:
125	tbl	v6.16b,{v4.16b},v2.16b
126	ext	v5.16b,v0.16b,v3.16b,#12
127	st1	{v4.4s},[x2],#16
128	aese	v6.16b,v0.16b
129	subs	w1,w1,#1
130
131	eor	v3.16b,v3.16b,v5.16b
132	ext	v5.16b,v0.16b,v5.16b,#12
133	eor	v3.16b,v3.16b,v5.16b
134	ext	v5.16b,v0.16b,v5.16b,#12
135	eor	v6.16b,v6.16b,v1.16b
136	eor	v3.16b,v3.16b,v5.16b
137	shl	v1.16b,v1.16b,#1
138	eor	v3.16b,v3.16b,v6.16b
139	st1	{v3.4s},[x2],#16
140	b.eq	Ldone
141
142	dup	v6.4s,v3.s[3]		// just splat
143	ext	v5.16b,v0.16b,v4.16b,#12
144	aese	v6.16b,v0.16b
145
146	eor	v4.16b,v4.16b,v5.16b
147	ext	v5.16b,v0.16b,v5.16b,#12
148	eor	v4.16b,v4.16b,v5.16b
149	ext	v5.16b,v0.16b,v5.16b,#12
150	eor	v4.16b,v4.16b,v5.16b
151
152	eor	v4.16b,v4.16b,v6.16b
153	b	Loop256
154
155Ldone:
156	str	w12,[x2]
157	mov	x3,#0
158
159Lenc_key_abort:
160	mov	x0,x3			// return value
161	ldr	x29,[sp],#16
162	ret
163
164.globl	_aes_hw_encrypt
165.private_extern	_aes_hw_encrypt
166
167.align	5
168_aes_hw_encrypt:
169	AARCH64_VALID_CALL_TARGET
170	ldr	w3,[x2,#240]
171	ld1	{v0.4s},[x2],#16
172	ld1	{v2.16b},[x0]
173	sub	w3,w3,#2
174	ld1	{v1.4s},[x2],#16
175
176Loop_enc:
177	aese	v2.16b,v0.16b
178	aesmc	v2.16b,v2.16b
179	ld1	{v0.4s},[x2],#16
180	subs	w3,w3,#2
181	aese	v2.16b,v1.16b
182	aesmc	v2.16b,v2.16b
183	ld1	{v1.4s},[x2],#16
184	b.gt	Loop_enc
185
186	aese	v2.16b,v0.16b
187	aesmc	v2.16b,v2.16b
188	ld1	{v0.4s},[x2]
189	aese	v2.16b,v1.16b
190	eor	v2.16b,v2.16b,v0.16b
191
192	st1	{v2.16b},[x1]
193	ret
194
195.globl	_aes_hw_ctr32_encrypt_blocks
196.private_extern	_aes_hw_ctr32_encrypt_blocks
197
198.align	5
199_aes_hw_ctr32_encrypt_blocks:
200	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
201	AARCH64_VALID_CALL_TARGET
202	stp	x29,x30,[sp,#-16]!
203	add	x29,sp,#0
204	ldr	w5,[x3,#240]
205
206	ldr	w8, [x4, #12]
207	ld1	{v0.4s},[x4]
208
209	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
210	sub	w5,w5,#4
211	mov	x12,#16
212	cmp	x2,#2
213	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
214	sub	w5,w5,#2
215	ld1	{v20.4s,v21.4s},[x7],#32
216	ld1	{v22.4s,v23.4s},[x7],#32
217	ld1	{v7.4s},[x7]
218	add	x7,x3,#32
219	mov	w6,w5
220	csel	x12,xzr,x12,lo
221
222	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
223	// affected by silicon errata #1742098 [0] and #1655431 [1],
224	// respectively, where the second instruction of an aese/aesmc
225	// instruction pair may execute twice if an interrupt is taken right
226	// after the first instruction consumes an input register of which a
227	// single 32-bit lane has been updated the last time it was modified.
228	//
229	// This function uses a counter in one 32-bit lane. The vmov lines
230	// could write to v1.16b and v18.16b directly, but that trips this bugs.
231	// We write to v6.16b and copy to the final register as a workaround.
232	//
233	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
234	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
235#ifndef __ARMEB__
236	rev	w8, w8
237#endif
238	add	w10, w8, #1
239	orr	v6.16b,v0.16b,v0.16b
240	rev	w10, w10
241	mov	v6.s[3],w10
242	add	w8, w8, #2
243	orr	v1.16b,v6.16b,v6.16b
244	b.ls	Lctr32_tail
245	rev	w12, w8
246	mov	v6.s[3],w12
247	sub	x2,x2,#3		// bias
248	orr	v18.16b,v6.16b,v6.16b
249	b	Loop3x_ctr32
250
251.align	4
252Loop3x_ctr32:
253	aese	v0.16b,v16.16b
254	aesmc	v0.16b,v0.16b
255	aese	v1.16b,v16.16b
256	aesmc	v1.16b,v1.16b
257	aese	v18.16b,v16.16b
258	aesmc	v18.16b,v18.16b
259	ld1	{v16.4s},[x7],#16
260	subs	w6,w6,#2
261	aese	v0.16b,v17.16b
262	aesmc	v0.16b,v0.16b
263	aese	v1.16b,v17.16b
264	aesmc	v1.16b,v1.16b
265	aese	v18.16b,v17.16b
266	aesmc	v18.16b,v18.16b
267	ld1	{v17.4s},[x7],#16
268	b.gt	Loop3x_ctr32
269
270	aese	v0.16b,v16.16b
271	aesmc	v4.16b,v0.16b
272	aese	v1.16b,v16.16b
273	aesmc	v5.16b,v1.16b
274	ld1	{v2.16b},[x0],#16
275	add	w9,w8,#1
276	aese	v18.16b,v16.16b
277	aesmc	v18.16b,v18.16b
278	ld1	{v3.16b},[x0],#16
279	rev	w9,w9
280	aese	v4.16b,v17.16b
281	aesmc	v4.16b,v4.16b
282	aese	v5.16b,v17.16b
283	aesmc	v5.16b,v5.16b
284	ld1	{v19.16b},[x0],#16
285	mov	x7,x3
286	aese	v18.16b,v17.16b
287	aesmc	v17.16b,v18.16b
288	aese	v4.16b,v20.16b
289	aesmc	v4.16b,v4.16b
290	aese	v5.16b,v20.16b
291	aesmc	v5.16b,v5.16b
292	eor	v2.16b,v2.16b,v7.16b
293	add	w10,w8,#2
294	aese	v17.16b,v20.16b
295	aesmc	v17.16b,v17.16b
296	eor	v3.16b,v3.16b,v7.16b
297	add	w8,w8,#3
298	aese	v4.16b,v21.16b
299	aesmc	v4.16b,v4.16b
300	aese	v5.16b,v21.16b
301	aesmc	v5.16b,v5.16b
302	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
303	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
304	 // 32-bit mode. See the comment above.
305	eor	v19.16b,v19.16b,v7.16b
306	mov	v6.s[3], w9
307	aese	v17.16b,v21.16b
308	aesmc	v17.16b,v17.16b
309	orr	v0.16b,v6.16b,v6.16b
310	rev	w10,w10
311	aese	v4.16b,v22.16b
312	aesmc	v4.16b,v4.16b
313	mov	v6.s[3], w10
314	rev	w12,w8
315	aese	v5.16b,v22.16b
316	aesmc	v5.16b,v5.16b
317	orr	v1.16b,v6.16b,v6.16b
318	mov	v6.s[3], w12
319	aese	v17.16b,v22.16b
320	aesmc	v17.16b,v17.16b
321	orr	v18.16b,v6.16b,v6.16b
322	subs	x2,x2,#3
323	aese	v4.16b,v23.16b
324	aese	v5.16b,v23.16b
325	aese	v17.16b,v23.16b
326
327	eor	v2.16b,v2.16b,v4.16b
328	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
329	st1	{v2.16b},[x1],#16
330	eor	v3.16b,v3.16b,v5.16b
331	mov	w6,w5
332	st1	{v3.16b},[x1],#16
333	eor	v19.16b,v19.16b,v17.16b
334	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
335	st1	{v19.16b},[x1],#16
336	b.hs	Loop3x_ctr32
337
338	adds	x2,x2,#3
339	b.eq	Lctr32_done
340	cmp	x2,#1
341	mov	x12,#16
342	csel	x12,xzr,x12,eq
343
344Lctr32_tail:
345	aese	v0.16b,v16.16b
346	aesmc	v0.16b,v0.16b
347	aese	v1.16b,v16.16b
348	aesmc	v1.16b,v1.16b
349	ld1	{v16.4s},[x7],#16
350	subs	w6,w6,#2
351	aese	v0.16b,v17.16b
352	aesmc	v0.16b,v0.16b
353	aese	v1.16b,v17.16b
354	aesmc	v1.16b,v1.16b
355	ld1	{v17.4s},[x7],#16
356	b.gt	Lctr32_tail
357
358	aese	v0.16b,v16.16b
359	aesmc	v0.16b,v0.16b
360	aese	v1.16b,v16.16b
361	aesmc	v1.16b,v1.16b
362	aese	v0.16b,v17.16b
363	aesmc	v0.16b,v0.16b
364	aese	v1.16b,v17.16b
365	aesmc	v1.16b,v1.16b
366	ld1	{v2.16b},[x0],x12
367	aese	v0.16b,v20.16b
368	aesmc	v0.16b,v0.16b
369	aese	v1.16b,v20.16b
370	aesmc	v1.16b,v1.16b
371	ld1	{v3.16b},[x0]
372	aese	v0.16b,v21.16b
373	aesmc	v0.16b,v0.16b
374	aese	v1.16b,v21.16b
375	aesmc	v1.16b,v1.16b
376	eor	v2.16b,v2.16b,v7.16b
377	aese	v0.16b,v22.16b
378	aesmc	v0.16b,v0.16b
379	aese	v1.16b,v22.16b
380	aesmc	v1.16b,v1.16b
381	eor	v3.16b,v3.16b,v7.16b
382	aese	v0.16b,v23.16b
383	aese	v1.16b,v23.16b
384
385	cmp	x2,#1
386	eor	v2.16b,v2.16b,v0.16b
387	eor	v3.16b,v3.16b,v1.16b
388	st1	{v2.16b},[x1],#16
389	b.eq	Lctr32_done
390	st1	{v3.16b},[x1]
391
392Lctr32_done:
393	ldr	x29,[sp],#16
394	ret
395
396#endif
397#endif  // !OPENSSL_NO_ASM
398