1/* 2 * memset - fill memory with a constant byte 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#include "asmdefs.h" 15 16#define dstin x0 17#define val x1 18#define valw w1 19#define count x2 20#define dst x3 21#define dstend x4 22#define zva_val x5 23 24ENTRY (__memset_aarch64) 25 PTR_ARG (0) 26 SIZE_ARG (2) 27 28 dup v0.16B, valw 29 add dstend, dstin, count 30 31 cmp count, 96 32 b.hi L(set_long) 33 cmp count, 16 34 b.hs L(set_medium) 35 mov val, v0.D[0] 36 37 /* Set 0..15 bytes. */ 38 tbz count, 3, 1f 39 str val, [dstin] 40 str val, [dstend, -8] 41 ret 42 .p2align 4 431: tbz count, 2, 2f 44 str valw, [dstin] 45 str valw, [dstend, -4] 46 ret 472: cbz count, 3f 48 strb valw, [dstin] 49 tbz count, 1, 3f 50 strh valw, [dstend, -2] 513: ret 52 53 /* Set 17..96 bytes. */ 54L(set_medium): 55 str q0, [dstin] 56 tbnz count, 6, L(set96) 57 str q0, [dstend, -16] 58 tbz count, 5, 1f 59 str q0, [dstin, 16] 60 str q0, [dstend, -32] 611: ret 62 63 .p2align 4 64 /* Set 64..96 bytes. Write 64 bytes from the start and 65 32 bytes from the end. */ 66L(set96): 67 str q0, [dstin, 16] 68 stp q0, q0, [dstin, 32] 69 stp q0, q0, [dstend, -32] 70 ret 71 72 .p2align 4 73L(set_long): 74 and valw, valw, 255 75 bic dst, dstin, 15 76 str q0, [dstin] 77 cmp count, 160 78 ccmp valw, 0, 0, hs 79 b.ne L(no_zva) 80 81#ifndef SKIP_ZVA_CHECK 82 mrs zva_val, dczid_el0 83 and zva_val, zva_val, 31 84 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 85 b.ne L(no_zva) 86#endif 87 str q0, [dst, 16] 88 stp q0, q0, [dst, 32] 89 bic dst, dst, 63 90 sub count, dstend, dst /* Count is now 64 too large. */ 91 sub count, count, 128 /* Adjust count and bias for loop. */ 92 93 .p2align 4 94L(zva_loop): 95 add dst, dst, 64 96 dc zva, dst 97 subs count, count, 64 98 b.hi L(zva_loop) 99 stp q0, q0, [dstend, -64] 100 stp q0, q0, [dstend, -32] 101 ret 102 103L(no_zva): 104 sub count, dstend, dst /* Count is 16 too large. */ 105 sub dst, dst, 16 /* Dst is biased by -32. */ 106 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 107L(no_zva_loop): 108 stp q0, q0, [dst, 32] 109 stp q0, q0, [dst, 64]! 110 subs count, count, 64 111 b.hi L(no_zva_loop) 112 stp q0, q0, [dstend, -64] 113 stp q0, q0, [dstend, -32] 114 ret 115 116END (__memset_aarch64) 117 118