• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU)
3 * Licensed under the Mulan PSL v2.
4 * You can use this software according to the terms and conditions of the Mulan PSL v2.
5 * You may obtain a copy of Mulan PSL v2 at:
6 *     http://license.coscl.org.cn/MulanPSL2
7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
9 * PURPOSE.
10 * See the Mulan PSL v2 for more details.
11 */
12
13#include <common/asm.h>
14
15/*
16 * void bzero(void *p, size_t size)
17 *
18 *  x0 - p
19 *  x1 - size
20 */
21
22BEGIN_FUNC(bzero)
23	cbz	x1, ending
24
25	/*
26	 * x5 is number of cache lines to zero - calculated later and
27	 * will become non-zero if  buffer is long enough to zero by
28	 * cache lines (and if it is allowed.)
29	 * We need to zero it before proceeding with buffers of size
30	 * smaller than 16 bytes - otherwise the x5 will not be
31	 * calculated and will retain random value.
32	 * "normal" is used for buffers <= 16 bytes and to align buffer
33	 * to cache line for buffers bigger than cache line; non-0 x5
34	 * after "normal" has completed indicates that it has been used
35	 * to align buffer to cache line and now zero by cache lines will
36	 * be performed, and x5 is amount of cache lines to loop through.
37	 */
38	mov	x5, xzr
39
40	/* No use of cache assisted zero for buffers with size <= 16 */
41	cmp	x1, #0x10
42	b.le	normal
43
44	/*
45	 * Load size of line that will be cleaned by dc zva call.
46	 * 0 means that the instruction is not allowed
47	 */
48	ldr	x7, =dczva_line_size
49	ldr	x7, [x7]
50	cbz	x7, normal
51
52	/*
53	 * Buffer must be larger than cache line for using cache zeroing
54	 * (and cache line aligned but this is checked after jump)
55	 */
56	cmp	x1, x7
57	b.lt	normal
58
59	/*
60	 * Calculate number of bytes to cache aligned address (x4) nad
61	 * number of full cache lines (x5). x6 is final address to zero.
62	 */
63	sub	x2, x7, #0x01
64	mov	x3, -1
65	eor	x3, x3, x2
66	add	x4, x0, x2
67	and	x4, x4, x3
68	subs	x4, x4, x0
69	b.eq	normal
70
71	/* Calculate number of "lines" in buffer */
72	sub	x5, x1, x4
73	rbit	x2, x7
74	clz	x2, x2
75	lsr	x5, x5, x2
76
77	/*
78	 * If number of cache lines is 0, we will not be able to zero
79	 * by cache lines, so go normal way.
80	 */
81	cbz	x5, normal
82	/* x6 is final address to zero */
83	add	x6, x0, x1
84
85	/*
86	 * We are here because x5 is non-0 so normal will be used to
87	 * align buffer before cache zeroing. x4 holds number of bytes
88	 * needed for alignment.
89	 */
90	mov	x1, x4
91
92	/* When jumping here: x0 holds pointer, x1 holds size */
93normal:
94	/*
95	 * Get buffer offset into 16 byte aligned address; 0 means pointer
96	 * is aligned.
97	 */
98	ands	x2, x0, #0x0f
99	b.eq	aligned_to_16
100	/* Calculate one-byte loop runs to 8 byte aligned address. */
101	ands	x2, x2, #0x07
102	mov	x3, #0x08
103	sub	x2, x3, x2
104	/* x2 is number of bytes missing for alignment, x1 is buffer size */
105	cmp	x1, x2
106	csel	x2, x1, x2, le
107	sub	x1, x1, x2
108
109	/*
110	 * Byte by byte copy will copy at least enough bytes to align
111	 * pointer and at most "size".
112	 */
113align:
114	strb	wzr, [x0], #0x01
115	subs	x2, x2, #0x01
116	b.ne	align
117
118	/* Now pointer is aligned to 8 bytes */
119	cmp	x1, #0x10
120	b.lt	lead_out
121	/*
122	 * Check if copy of another 8 bytes is needed to align to 16 byte
123	 * address and do it
124	 */
125	tbz	x0, #0x03, aligned_to_16
126	str	xzr, [x0], #0x08
127	sub	x1, x1, #0x08
128
129	/* While jumping here: x0 is 16 byte alligned address, x1 is size */
130aligned_to_16:
131	/* If size is less than 16 bytes, use lead_out to copy what remains */
132	cmp	x1, #0x10
133	b.lt	lead_out
134
135	lsr	x2, x1, #0x04
136zero_by_16:
137	stp	xzr, xzr, [x0], #0x10
138	subs	x2, x2, #0x01
139	b.ne	zero_by_16
140
141	/*
142	 * Lead out requires addresses to be aligned to 8 bytes. It is used to
143	 * zero buffers with sizes < 16 and what can not be zeroed by
144	 * zero_by_16 loop.
145	 */
146	ands	x1, x1, #0x0f
147	b.eq	lead_out_end
148lead_out:
149	tbz	x1, #0x03, lead_out_dword
150	str	xzr, [x0], #0x08
151lead_out_dword:
152	tbz	x1, #0x02, lead_out_word
153	str	wzr, [x0], #0x04
154lead_out_word:
155	tbz	x1, #0x01, lead_out_byte
156	strh	wzr, [x0], #0x02
157lead_out_byte:
158	tbz	x1, #0x00, lead_out_end
159	strb	wzr, [x0], #0x01
160
161lead_out_end:
162	/*
163	 * If x5 is non-zero, this means that normal has been used as
164	 * a lead in to align buffer address to cache size
165	 */
166	cbz	x5, ending
167
168	/*
169	 * Here x5 holds number of lines to zero; x6 is final address of
170	 * buffer. x0 is cache line aligned pointer. x7 is cache line size
171	 * in bytes
172	 */
173cache_line_zero:
174	dc	zva, x0
175	add	x0, x0, x7
176	subs	x5, x5, #0x01
177	b.ne	cache_line_zero
178
179	/* Need to zero remaining bytes? */
180	subs	x1, x6, x0
181	b.ne	normal
182
183ending:
184	ret
185END_FUNC(bzero)
186