• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2013 ARM Ltd.
4 * Copyright (C) 2013 Linaro.
5 *
6 * This code is based on glibc cortex strings work originally authored by Linaro
7 * be found @
8 *
9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10 * files/head:/src/aarch64/
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15#include <asm/cache.h>
16
17/*
18 * Move a buffer from src to test (alignment handled by the hardware).
19 * If dest <= src, call memcpy, otherwise copy in reverse order.
20 *
21 * Parameters:
22 *	x0 - dest
23 *	x1 - src
24 *	x2 - n
25 * Returns:
26 *	x0 - dest
27 */
28dstin	.req	x0
29src	.req	x1
30count	.req	x2
31tmp1	.req	x3
32tmp1w	.req	w3
33tmp2	.req	x4
34tmp2w	.req	w4
35tmp3	.req	x5
36tmp3w	.req	w5
37dst	.req	x6
38
39A_l	.req	x7
40A_h	.req	x8
41B_l	.req	x9
42B_h	.req	x10
43C_l	.req	x11
44C_h	.req	x12
45D_l	.req	x13
46D_h	.req	x14
47
48SYM_FUNC_START_ALIAS(__memmove)
49SYM_FUNC_START_WEAK_PI(memmove)
50	cmp	dstin, src
51	b.lo	__memcpy
52	add	tmp1, src, count
53	cmp	dstin, tmp1
54	b.hs	__memcpy		/* No overlap.  */
55
56	add	dst, dstin, count
57	add	src, src, count
58	cmp	count, #16
59	b.lo	.Ltail15  /*probably non-alignment accesses.*/
60
61	ands	tmp2, src, #15     /* Bytes to reach alignment.  */
62	b.eq	.LSrcAligned
63	sub	count, count, tmp2
64	/*
65	* process the aligned offset length to make the src aligned firstly.
66	* those extra instructions' cost is acceptable. It also make the
67	* coming accesses are based on aligned address.
68	*/
69	tbz	tmp2, #0, 1f
70	ldrb	tmp1w, [src, #-1]!
71	strb	tmp1w, [dst, #-1]!
721:
73	tbz	tmp2, #1, 2f
74	ldrh	tmp1w, [src, #-2]!
75	strh	tmp1w, [dst, #-2]!
762:
77	tbz	tmp2, #2, 3f
78	ldr	tmp1w, [src, #-4]!
79	str	tmp1w, [dst, #-4]!
803:
81	tbz	tmp2, #3, .LSrcAligned
82	ldr	tmp1, [src, #-8]!
83	str	tmp1, [dst, #-8]!
84
85.LSrcAligned:
86	cmp	count, #64
87	b.ge	.Lcpy_over64
88
89	/*
90	* Deal with small copies quickly by dropping straight into the
91	* exit block.
92	*/
93.Ltail63:
94	/*
95	* Copy up to 48 bytes of data. At this point we only need the
96	* bottom 6 bits of count to be accurate.
97	*/
98	ands	tmp1, count, #0x30
99	b.eq	.Ltail15
100	cmp	tmp1w, #0x20
101	b.eq	1f
102	b.lt	2f
103	ldp	A_l, A_h, [src, #-16]!
104	stp	A_l, A_h, [dst, #-16]!
1051:
106	ldp	A_l, A_h, [src, #-16]!
107	stp	A_l, A_h, [dst, #-16]!
1082:
109	ldp	A_l, A_h, [src, #-16]!
110	stp	A_l, A_h, [dst, #-16]!
111
112.Ltail15:
113	tbz	count, #3, 1f
114	ldr	tmp1, [src, #-8]!
115	str	tmp1, [dst, #-8]!
1161:
117	tbz	count, #2, 2f
118	ldr	tmp1w, [src, #-4]!
119	str	tmp1w, [dst, #-4]!
1202:
121	tbz	count, #1, 3f
122	ldrh	tmp1w, [src, #-2]!
123	strh	tmp1w, [dst, #-2]!
1243:
125	tbz	count, #0, .Lexitfunc
126	ldrb	tmp1w, [src, #-1]
127	strb	tmp1w, [dst, #-1]
128
129.Lexitfunc:
130	ret
131
132.Lcpy_over64:
133	subs	count, count, #128
134	b.ge	.Lcpy_body_large
135	/*
136	* Less than 128 bytes to copy, so handle 64 bytes here and then jump
137	* to the tail.
138	*/
139	ldp	A_l, A_h, [src, #-16]
140	stp	A_l, A_h, [dst, #-16]
141	ldp	B_l, B_h, [src, #-32]
142	ldp	C_l, C_h, [src, #-48]
143	stp	B_l, B_h, [dst, #-32]
144	stp	C_l, C_h, [dst, #-48]
145	ldp	D_l, D_h, [src, #-64]!
146	stp	D_l, D_h, [dst, #-64]!
147
148	tst	count, #0x3f
149	b.ne	.Ltail63
150	ret
151
152	/*
153	* Critical loop. Start at a new cache line boundary. Assuming
154	* 64 bytes per line this ensures the entire loop is in one line.
155	*/
156	.p2align	L1_CACHE_SHIFT
157.Lcpy_body_large:
158	/* pre-load 64 bytes data. */
159	ldp	A_l, A_h, [src, #-16]
160	ldp	B_l, B_h, [src, #-32]
161	ldp	C_l, C_h, [src, #-48]
162	ldp	D_l, D_h, [src, #-64]!
1631:
164	/*
165	* interlace the load of next 64 bytes data block with store of the last
166	* loaded 64 bytes data.
167	*/
168	stp	A_l, A_h, [dst, #-16]
169	ldp	A_l, A_h, [src, #-16]
170	stp	B_l, B_h, [dst, #-32]
171	ldp	B_l, B_h, [src, #-32]
172	stp	C_l, C_h, [dst, #-48]
173	ldp	C_l, C_h, [src, #-48]
174	stp	D_l, D_h, [dst, #-64]!
175	ldp	D_l, D_h, [src, #-64]!
176	subs	count, count, #64
177	b.ge	1b
178	stp	A_l, A_h, [dst, #-16]
179	stp	B_l, B_h, [dst, #-32]
180	stp	C_l, C_h, [dst, #-48]
181	stp	D_l, D_h, [dst, #-64]!
182
183	tst	count, #0x3f
184	b.ne	.Ltail63
185	ret
186SYM_FUNC_END_PI(memmove)
187EXPORT_SYMBOL(memmove)
188SYM_FUNC_END_ALIAS(__memmove)
189EXPORT_SYMBOL(__memmove)
190