1/* 2 * memset - fill memory with a constant byte 3 * 4 * Copyright (c) 2012-2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#define dstin x0 15#define val x1 16#define valw w1 17#define count x2 18#define dst x3 19#define dstend x4 20#define zva_val x5 21 22.global memset 23.type memset,%function 24memset: 25 26 dup v0.16B, valw 27 add dstend, dstin, count 28 29 cmp count, 96 30 b.hi .Lset_long 31 cmp count, 16 32 b.hs .Lset_medium 33 mov val, v0.D[0] 34 35 /* Set 0..15 bytes. */ 36 tbz count, 3, 1f 37 str val, [dstin] 38 str val, [dstend, -8] 39 ret 40 nop 411: tbz count, 2, 2f 42 str valw, [dstin] 43 str valw, [dstend, -4] 44 ret 452: cbz count, 3f 46 strb valw, [dstin] 47 tbz count, 1, 3f 48 strh valw, [dstend, -2] 493: ret 50 51 /* Set 17..96 bytes. */ 52.Lset_medium: 53 str q0, [dstin] 54 tbnz count, 6, .Lset96 55 str q0, [dstend, -16] 56 tbz count, 5, 1f 57 str q0, [dstin, 16] 58 str q0, [dstend, -32] 591: ret 60 61 .p2align 4 62 /* Set 64..96 bytes. Write 64 bytes from the start and 63 32 bytes from the end. */ 64.Lset96: 65 str q0, [dstin, 16] 66 stp q0, q0, [dstin, 32] 67 stp q0, q0, [dstend, -32] 68 ret 69 70 .p2align 4 71.Lset_long: 72 and valw, valw, 255 73 bic dst, dstin, 15 74 str q0, [dstin] 75 cmp count, 160 76 ccmp valw, 0, 0, hs 77 b.ne .Lno_zva 78 79#ifndef SKIP_ZVA_CHECK 80 mrs zva_val, dczid_el0 81 and zva_val, zva_val, 31 82 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 83 b.ne .Lno_zva 84#endif 85 str q0, [dst, 16] 86 stp q0, q0, [dst, 32] 87 bic dst, dst, 63 88 sub count, dstend, dst /* Count is now 64 too large. */ 89 sub count, count, 128 /* Adjust count and bias for loop. */ 90 91 .p2align 4 92.Lzva_loop: 93 add dst, dst, 64 94 dc zva, dst 95 subs count, count, 64 96 b.hi .Lzva_loop 97 stp q0, q0, [dstend, -64] 98 stp q0, q0, [dstend, -32] 99 ret 100 101.Lno_zva: 102 sub count, dstend, dst /* Count is 16 too large. */ 103 sub dst, dst, 16 /* Dst is biased by -32. */ 104 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 105.Lno_zva_loop: 106 stp q0, q0, [dst, 32] 107 stp q0, q0, [dst, 64]! 108 subs count, count, 64 109 b.hi .Lno_zva_loop 110 stp q0, q0, [dstend, -64] 111 stp q0, q0, [dstend, -32] 112 ret 113 114.size memset,.-memset 115 116