• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * memset - fill memory with a constant byte
3 *
4 * Copyright (c) 2012-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8/* Assumptions:
9 *
10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
11 *
12 */
13
14#define dstin   x0
15#define val     x1
16#define valw    w1
17#define count   x2
18#define dst     x3
19#define dstend  x4
20#define zva_val x5
21
22.global memset
23.type memset,%function
24memset:
25
26	dup     v0.16B, valw
27	add     dstend, dstin, count
28
29	cmp     count, 96
30	b.hi    .Lset_long
31	cmp     count, 16
32	b.hs    .Lset_medium
33	mov     val, v0.D[0]
34
35	/* Set 0..15 bytes.  */
36	tbz     count, 3, 1f
37	str     val, [dstin]
38	str     val, [dstend, -8]
39	ret
40	nop
411:      tbz     count, 2, 2f
42	str     valw, [dstin]
43	str     valw, [dstend, -4]
44	ret
452:      cbz     count, 3f
46	strb    valw, [dstin]
47	tbz     count, 1, 3f
48	strh    valw, [dstend, -2]
493:      ret
50
51	/* Set 17..96 bytes.  */
52.Lset_medium:
53	str     q0, [dstin]
54	tbnz    count, 6, .Lset96
55	str     q0, [dstend, -16]
56	tbz     count, 5, 1f
57	str     q0, [dstin, 16]
58	str     q0, [dstend, -32]
591:      ret
60
61	.p2align 4
62	/* Set 64..96 bytes.  Write 64 bytes from the start and
63	   32 bytes from the end.  */
64.Lset96:
65	str     q0, [dstin, 16]
66	stp     q0, q0, [dstin, 32]
67	stp     q0, q0, [dstend, -32]
68	ret
69
70	.p2align 4
71.Lset_long:
72	and     valw, valw, 255
73	bic     dst, dstin, 15
74	str     q0, [dstin]
75	cmp     count, 160
76	ccmp    valw, 0, 0, hs
77	b.ne    .Lno_zva
78
79#ifndef SKIP_ZVA_CHECK
80	mrs     zva_val, dczid_el0
81	and     zva_val, zva_val, 31
82	cmp     zva_val, 4              /* ZVA size is 64 bytes.  */
83	b.ne    .Lno_zva
84#endif
85	str     q0, [dst, 16]
86	stp     q0, q0, [dst, 32]
87	bic     dst, dst, 63
88	sub     count, dstend, dst      /* Count is now 64 too large.  */
89	sub     count, count, 128       /* Adjust count and bias for loop.  */
90
91	.p2align 4
92.Lzva_loop:
93	add     dst, dst, 64
94	dc      zva, dst
95	subs    count, count, 64
96	b.hi    .Lzva_loop
97	stp     q0, q0, [dstend, -64]
98	stp     q0, q0, [dstend, -32]
99	ret
100
101.Lno_zva:
102	sub     count, dstend, dst      /* Count is 16 too large.  */
103	sub     dst, dst, 16            /* Dst is biased by -32.  */
104	sub     count, count, 64 + 16   /* Adjust count and bias for loop.  */
105.Lno_zva_loop:
106	stp     q0, q0, [dst, 32]
107	stp     q0, q0, [dst, 64]!
108	subs    count, count, 64
109	b.hi    .Lno_zva_loop
110	stp     q0, q0, [dstend, -64]
111	stp     q0, q0, [dstend, -32]
112	ret
113
114.size memset,.-memset
115
116