1/* 2 * Copyright (c) 2023 Institute of Parallel And Distributed Systems (IPADS), Shanghai Jiao Tong University (SJTU) 3 * Licensed under the Mulan PSL v2. 4 * You can use this software according to the terms and conditions of the Mulan PSL v2. 5 * You may obtain a copy of Mulan PSL v2 at: 6 * http://license.coscl.org.cn/MulanPSL2 7 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR 8 * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR 9 * PURPOSE. 10 * See the Mulan PSL v2 for more details. 11 */ 12 13#include <common/asm.h> 14 15/* 16 * void bzero(void *p, size_t size) 17 * 18 * x0 - p 19 * x1 - size 20 */ 21 22BEGIN_FUNC(bzero) 23 cbz x1, ending 24 25 /* 26 * x5 is number of cache lines to zero - calculated later and 27 * will become non-zero if buffer is long enough to zero by 28 * cache lines (and if it is allowed.) 29 * We need to zero it before proceeding with buffers of size 30 * smaller than 16 bytes - otherwise the x5 will not be 31 * calculated and will retain random value. 32 * "normal" is used for buffers <= 16 bytes and to align buffer 33 * to cache line for buffers bigger than cache line; non-0 x5 34 * after "normal" has completed indicates that it has been used 35 * to align buffer to cache line and now zero by cache lines will 36 * be performed, and x5 is amount of cache lines to loop through. 37 */ 38 mov x5, xzr 39 40 /* No use of cache assisted zero for buffers with size <= 16 */ 41 cmp x1, #0x10 42 b.le normal 43 44 /* 45 * Load size of line that will be cleaned by dc zva call. 46 * 0 means that the instruction is not allowed 47 */ 48 ldr x7, =dczva_line_size 49 ldr x7, [x7] 50 cbz x7, normal 51 52 /* 53 * Buffer must be larger than cache line for using cache zeroing 54 * (and cache line aligned but this is checked after jump) 55 */ 56 cmp x1, x7 57 b.lt normal 58 59 /* 60 * Calculate number of bytes to cache aligned address (x4) nad 61 * number of full cache lines (x5). x6 is final address to zero. 62 */ 63 sub x2, x7, #0x01 64 mov x3, -1 65 eor x3, x3, x2 66 add x4, x0, x2 67 and x4, x4, x3 68 subs x4, x4, x0 69 b.eq normal 70 71 /* Calculate number of "lines" in buffer */ 72 sub x5, x1, x4 73 rbit x2, x7 74 clz x2, x2 75 lsr x5, x5, x2 76 77 /* 78 * If number of cache lines is 0, we will not be able to zero 79 * by cache lines, so go normal way. 80 */ 81 cbz x5, normal 82 /* x6 is final address to zero */ 83 add x6, x0, x1 84 85 /* 86 * We are here because x5 is non-0 so normal will be used to 87 * align buffer before cache zeroing. x4 holds number of bytes 88 * needed for alignment. 89 */ 90 mov x1, x4 91 92 /* When jumping here: x0 holds pointer, x1 holds size */ 93normal: 94 /* 95 * Get buffer offset into 16 byte aligned address; 0 means pointer 96 * is aligned. 97 */ 98 ands x2, x0, #0x0f 99 b.eq aligned_to_16 100 /* Calculate one-byte loop runs to 8 byte aligned address. */ 101 ands x2, x2, #0x07 102 mov x3, #0x08 103 sub x2, x3, x2 104 /* x2 is number of bytes missing for alignment, x1 is buffer size */ 105 cmp x1, x2 106 csel x2, x1, x2, le 107 sub x1, x1, x2 108 109 /* 110 * Byte by byte copy will copy at least enough bytes to align 111 * pointer and at most "size". 112 */ 113align: 114 strb wzr, [x0], #0x01 115 subs x2, x2, #0x01 116 b.ne align 117 118 /* Now pointer is aligned to 8 bytes */ 119 cmp x1, #0x10 120 b.lt lead_out 121 /* 122 * Check if copy of another 8 bytes is needed to align to 16 byte 123 * address and do it 124 */ 125 tbz x0, #0x03, aligned_to_16 126 str xzr, [x0], #0x08 127 sub x1, x1, #0x08 128 129 /* While jumping here: x0 is 16 byte alligned address, x1 is size */ 130aligned_to_16: 131 /* If size is less than 16 bytes, use lead_out to copy what remains */ 132 cmp x1, #0x10 133 b.lt lead_out 134 135 lsr x2, x1, #0x04 136zero_by_16: 137 stp xzr, xzr, [x0], #0x10 138 subs x2, x2, #0x01 139 b.ne zero_by_16 140 141 /* 142 * Lead out requires addresses to be aligned to 8 bytes. It is used to 143 * zero buffers with sizes < 16 and what can not be zeroed by 144 * zero_by_16 loop. 145 */ 146 ands x1, x1, #0x0f 147 b.eq lead_out_end 148lead_out: 149 tbz x1, #0x03, lead_out_dword 150 str xzr, [x0], #0x08 151lead_out_dword: 152 tbz x1, #0x02, lead_out_word 153 str wzr, [x0], #0x04 154lead_out_word: 155 tbz x1, #0x01, lead_out_byte 156 strh wzr, [x0], #0x02 157lead_out_byte: 158 tbz x1, #0x00, lead_out_end 159 strb wzr, [x0], #0x01 160 161lead_out_end: 162 /* 163 * If x5 is non-zero, this means that normal has been used as 164 * a lead in to align buffer address to cache size 165 */ 166 cbz x5, ending 167 168 /* 169 * Here x5 holds number of lines to zero; x6 is final address of 170 * buffer. x0 is cache line aligned pointer. x7 is cache line size 171 * in bytes 172 */ 173cache_line_zero: 174 dc zva, x0 175 add x0, x0, x7 176 subs x5, x5, #0x01 177 b.ne cache_line_zero 178 179 /* Need to zero remaining bytes? */ 180 subs x1, x6, x0 181 b.ne normal 182 183ending: 184 ret 185END_FUNC(bzero) 186