• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3
4define i8* @test_memcpy1_generic(i8* %P, i8* %Q) {
5; CHECK-LABEL: test_memcpy1_generic:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    pushq %rbx
8; CHECK-NEXT:    .cfi_def_cfa_offset 16
9; CHECK-NEXT:    .cfi_offset %rbx, -16
10; CHECK-NEXT:    movq %rdi, %rbx
11; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
12; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
13; CHECK-NEXT:    movq %rbx, %rax
14; CHECK-NEXT:    popq %rbx
15; CHECK-NEXT:    .cfi_def_cfa_offset 8
16; CHECK-NEXT:    retq
17  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 1)
18  ret i8* %P
19  ; 3rd arg (%edx) -- length
20}
21
22define i8* @test_memcpy2_generic(i8* %P, i8* %Q) {
23; CHECK-LABEL: test_memcpy2_generic:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    pushq %rbx
26; CHECK-NEXT:    .cfi_def_cfa_offset 16
27; CHECK-NEXT:    .cfi_offset %rbx, -16
28; CHECK-NEXT:    movq %rdi, %rbx
29; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
30; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_2
31; CHECK-NEXT:    movq %rbx, %rax
32; CHECK-NEXT:    popq %rbx
33; CHECK-NEXT:    .cfi_def_cfa_offset 8
34; CHECK-NEXT:    retq
35  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 2)
36  ret i8* %P
37  ; 3rd arg (%edx) -- length
38}
39
40define i8* @test_memcpy4_generic(i8* %P, i8* %Q) {
41; CHECK-LABEL: test_memcpy4_generic:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    pushq %rbx
44; CHECK-NEXT:    .cfi_def_cfa_offset 16
45; CHECK-NEXT:    .cfi_offset %rbx, -16
46; CHECK-NEXT:    movq %rdi, %rbx
47; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
48; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_4
49; CHECK-NEXT:    movq %rbx, %rax
50; CHECK-NEXT:    popq %rbx
51; CHECK-NEXT:    .cfi_def_cfa_offset 8
52; CHECK-NEXT:    retq
53  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 4)
54  ret i8* %P
55  ; 3rd arg (%edx) -- length
56}
57
58define i8* @test_memcpy8(i8* %P, i8* %Q) {
59; CHECK-LABEL: test_memcpy8:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    pushq %rbx
62; CHECK-NEXT:    .cfi_def_cfa_offset 16
63; CHECK-NEXT:    .cfi_offset %rbx, -16
64; CHECK-NEXT:    movq %rdi, %rbx
65; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
66; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_8
67; CHECK-NEXT:    movq %rbx, %rax
68; CHECK-NEXT:    popq %rbx
69; CHECK-NEXT:    .cfi_def_cfa_offset 8
70; CHECK-NEXT:    retq
71  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 1024, i32 8)
72  ret i8* %P
73  ; 3rd arg (%edx) -- length
74}
75
76define i8* @test_memcpy16_generic(i8* %P, i8* %Q) {
77; CHECK-LABEL: test_memcpy16_generic:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    pushq %rbx
80; CHECK-NEXT:    .cfi_def_cfa_offset 16
81; CHECK-NEXT:    .cfi_offset %rbx, -16
82; CHECK-NEXT:    movq %rdi, %rbx
83; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
84; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_16
85; CHECK-NEXT:    movq %rbx, %rax
86; CHECK-NEXT:    popq %rbx
87; CHECK-NEXT:    .cfi_def_cfa_offset 8
88; CHECK-NEXT:    retq
89  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 1024, i32 16)
90  ret i8* %P
91  ; 3rd arg (%edx) -- length
92}
93
94define void @test_memcpy_args(i8** %Storage) {
95; CHECK-LABEL: test_memcpy_args:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    pushq %rax
98; CHECK-NEXT:    .cfi_def_cfa_offset 16
99; CHECK-NEXT:    movq (%rdi), %rax
100; CHECK-NEXT:    movq 8(%rdi), %rsi
101; CHECK-NEXT:    movq %rax, %rdi
102; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
103; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_4
104; CHECK-NEXT:    popq %rax
105; CHECK-NEXT:    .cfi_def_cfa_offset 8
106; CHECK-NEXT:    retq
107  %Dst = load i8*, i8** %Storage
108  %Src.addr = getelementptr i8*, i8** %Storage, i64 1
109  %Src = load i8*, i8** %Src.addr
110
111  ; 1st arg (%rdi)
112  ; 2nd arg (%rsi)
113  ; 3rd arg (%edx) -- length
114  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 1024, i32 4)
115  ret void
116}
117
118define i8* @test_memmove1_generic(i8* %P, i8* %Q) {
119; CHECK-LABEL: test_memmove1_generic:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    pushq %rbx
122; CHECK-NEXT:    .cfi_def_cfa_offset 16
123; CHECK-NEXT:    .cfi_offset %rbx, -16
124; CHECK-NEXT:    movq %rdi, %rbx
125; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
126; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_1
127; CHECK-NEXT:    movq %rbx, %rax
128; CHECK-NEXT:    popq %rbx
129; CHECK-NEXT:    .cfi_def_cfa_offset 8
130; CHECK-NEXT:    retq
131  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 1)
132  ret i8* %P
133  ; 3rd arg (%edx) -- length
134}
135
136define i8* @test_memmove2_generic(i8* %P, i8* %Q) {
137; CHECK-LABEL: test_memmove2_generic:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    pushq %rbx
140; CHECK-NEXT:    .cfi_def_cfa_offset 16
141; CHECK-NEXT:    .cfi_offset %rbx, -16
142; CHECK-NEXT:    movq %rdi, %rbx
143; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
144; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_2
145; CHECK-NEXT:    movq %rbx, %rax
146; CHECK-NEXT:    popq %rbx
147; CHECK-NEXT:    .cfi_def_cfa_offset 8
148; CHECK-NEXT:    retq
149  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 2)
150  ret i8* %P
151  ; 3rd arg (%edx) -- length
152}
153
154define i8* @test_memmove4_generic(i8* %P, i8* %Q) {
155; CHECK-LABEL: test_memmove4_generic:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    pushq %rbx
158; CHECK-NEXT:    .cfi_def_cfa_offset 16
159; CHECK-NEXT:    .cfi_offset %rbx, -16
160; CHECK-NEXT:    movq %rdi, %rbx
161; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
162; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_4
163; CHECK-NEXT:    movq %rbx, %rax
164; CHECK-NEXT:    popq %rbx
165; CHECK-NEXT:    .cfi_def_cfa_offset 8
166; CHECK-NEXT:    retq
167  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 4)
168  ret i8* %P
169  ; 3rd arg (%edx) -- length
170}
171
172define i8* @test_memmove8_generic(i8* %P, i8* %Q) {
173; CHECK-LABEL: test_memmove8_generic:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    pushq %rbx
176; CHECK-NEXT:    .cfi_def_cfa_offset 16
177; CHECK-NEXT:    .cfi_offset %rbx, -16
178; CHECK-NEXT:    movq %rdi, %rbx
179; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
180; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_8
181; CHECK-NEXT:    movq %rbx, %rax
182; CHECK-NEXT:    popq %rbx
183; CHECK-NEXT:    .cfi_def_cfa_offset 8
184; CHECK-NEXT:    retq
185  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 1024, i32 8)
186  ret i8* %P
187  ; 3rd arg (%edx) -- length
188}
189
190define i8* @test_memmove16_generic(i8* %P, i8* %Q) {
191; CHECK-LABEL: test_memmove16_generic:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    pushq %rbx
194; CHECK-NEXT:    .cfi_def_cfa_offset 16
195; CHECK-NEXT:    .cfi_offset %rbx, -16
196; CHECK-NEXT:    movq %rdi, %rbx
197; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
198; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_16
199; CHECK-NEXT:    movq %rbx, %rax
200; CHECK-NEXT:    popq %rbx
201; CHECK-NEXT:    .cfi_def_cfa_offset 8
202; CHECK-NEXT:    retq
203  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 1024, i32 16)
204  ret i8* %P
205  ; 3rd arg (%edx) -- length
206}
207
208define void @test_memmove_args(i8** %Storage) {
209; CHECK-LABEL: test_memmove_args:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    pushq %rax
212; CHECK-NEXT:    .cfi_def_cfa_offset 16
213; CHECK-NEXT:    movq (%rdi), %rax
214; CHECK-NEXT:    movq 8(%rdi), %rsi
215; CHECK-NEXT:    movq %rax, %rdi
216; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
217; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_4
218; CHECK-NEXT:    popq %rax
219; CHECK-NEXT:    .cfi_def_cfa_offset 8
220; CHECK-NEXT:    retq
221  %Dst = load i8*, i8** %Storage
222  %Src.addr = getelementptr i8*, i8** %Storage, i64 1
223  %Src = load i8*, i8** %Src.addr
224
225  ; 1st arg (%rdi)
226  ; 2nd arg (%rsi)
227  ; 3rd arg (%edx) -- length
228  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 1024, i32 4)
229  ret void
230}
231
232define i8* @test_memset1_generic(i8* %P, i8 %V) {
233; CHECK-LABEL: test_memset1_generic:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    pushq %rbx
236; CHECK-NEXT:    .cfi_def_cfa_offset 16
237; CHECK-NEXT:    .cfi_offset %rbx, -16
238; CHECK-NEXT:    movq %rdi, %rbx
239; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
240; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_1
241; CHECK-NEXT:    movq %rbx, %rax
242; CHECK-NEXT:    popq %rbx
243; CHECK-NEXT:    .cfi_def_cfa_offset 8
244; CHECK-NEXT:    retq
245  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 1024, i32 1)
246  ret i8* %P
247  ; 3rd arg (%edx) -- length
248}
249
250define i8* @test_memset2_generic(i8* %P, i8 %V) {
251; CHECK-LABEL: test_memset2_generic:
252; CHECK:       # %bb.0:
253; CHECK-NEXT:    pushq %rbx
254; CHECK-NEXT:    .cfi_def_cfa_offset 16
255; CHECK-NEXT:    .cfi_offset %rbx, -16
256; CHECK-NEXT:    movq %rdi, %rbx
257; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
258; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_2
259; CHECK-NEXT:    movq %rbx, %rax
260; CHECK-NEXT:    popq %rbx
261; CHECK-NEXT:    .cfi_def_cfa_offset 8
262; CHECK-NEXT:    retq
263  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %P, i8 %V, i32 1024, i32 2)
264  ret i8* %P
265  ; 3rd arg (%edx) -- length
266}
267
268define i8* @test_memset4_generic(i8* %P, i8 %V) {
269; CHECK-LABEL: test_memset4_generic:
270; CHECK:       # %bb.0:
271; CHECK-NEXT:    pushq %rbx
272; CHECK-NEXT:    .cfi_def_cfa_offset 16
273; CHECK-NEXT:    .cfi_offset %rbx, -16
274; CHECK-NEXT:    movq %rdi, %rbx
275; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
276; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
277; CHECK-NEXT:    movq %rbx, %rax
278; CHECK-NEXT:    popq %rbx
279; CHECK-NEXT:    .cfi_def_cfa_offset 8
280; CHECK-NEXT:    retq
281  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1024, i32 4)
282  ret i8* %P
283  ; 3rd arg (%edx) -- length
284}
285
286define i8* @test_memset8_generic(i8* %P, i8 %V) {
287; CHECK-LABEL: test_memset8_generic:
288; CHECK:       # %bb.0:
289; CHECK-NEXT:    pushq %rbx
290; CHECK-NEXT:    .cfi_def_cfa_offset 16
291; CHECK-NEXT:    .cfi_offset %rbx, -16
292; CHECK-NEXT:    movq %rdi, %rbx
293; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
294; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_8
295; CHECK-NEXT:    movq %rbx, %rax
296; CHECK-NEXT:    popq %rbx
297; CHECK-NEXT:    .cfi_def_cfa_offset 8
298; CHECK-NEXT:    retq
299  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 1024, i32 8)
300  ret i8* %P
301  ; 3rd arg (%edx) -- length
302}
303
304define i8* @test_memset16_generic(i8* %P, i8 %V) {
305; CHECK-LABEL: test_memset16_generic:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    pushq %rbx
308; CHECK-NEXT:    .cfi_def_cfa_offset 16
309; CHECK-NEXT:    .cfi_offset %rbx, -16
310; CHECK-NEXT:    movq %rdi, %rbx
311; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
312; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_16
313; CHECK-NEXT:    movq %rbx, %rax
314; CHECK-NEXT:    popq %rbx
315; CHECK-NEXT:    .cfi_def_cfa_offset 8
316; CHECK-NEXT:    retq
317  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 1024, i32 16)
318  ret i8* %P
319  ; 3rd arg (%edx) -- length
320}
321
322define void @test_memset_args(i8** %Storage, i8* %V) {
323; CHECK-LABEL: test_memset_args:
324; CHECK:       # %bb.0:
325; CHECK-NEXT:    pushq %rax
326; CHECK-NEXT:    .cfi_def_cfa_offset 16
327; CHECK-NEXT:    movq (%rdi), %rdi
328; CHECK-NEXT:    movzbl (%rsi), %esi
329; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
330; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
331; CHECK-NEXT:    popq %rax
332; CHECK-NEXT:    .cfi_def_cfa_offset 8
333; CHECK-NEXT:    retq
334  %Dst = load i8*, i8** %Storage
335  %Val = load i8, i8* %V
336
337  ; 1st arg (%rdi)
338  ; 2nd arg (%rsi)
339  ; 3rd arg (%edx) -- length
340  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 1024, i32 4)
341  ret void
342}
343
344;; Next batch of tests are cases where we could profitably lower to
345;; atomic loads and stores directly, just as we do for non-atomic ones for
346;; non element.unorderered.atomic variants.
347
348define i8* @test_memcpy1_64(i8* %P, i8* %Q) {
349; CHECK-LABEL: test_memcpy1_64:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    pushq %rbx
352; CHECK-NEXT:    .cfi_def_cfa_offset 16
353; CHECK-NEXT:    .cfi_offset %rbx, -16
354; CHECK-NEXT:    movq %rdi, %rbx
355; CHECK-NEXT:    movl $64, %edx
356; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
357; CHECK-NEXT:    movq %rbx, %rax
358; CHECK-NEXT:    popq %rbx
359; CHECK-NEXT:    .cfi_def_cfa_offset 8
360; CHECK-NEXT:    retq
361  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1)
362  ret i8* %P
363}
364
365; Ensure align 16 generates vector load/stores even with small element size
366define i8* @test_memcpy1_64_align4(i8* %P, i8* %Q) {
367; CHECK-LABEL: test_memcpy1_64_align4:
368; CHECK:       # %bb.0:
369; CHECK-NEXT:    pushq %rbx
370; CHECK-NEXT:    .cfi_def_cfa_offset 16
371; CHECK-NEXT:    .cfi_offset %rbx, -16
372; CHECK-NEXT:    movq %rdi, %rbx
373; CHECK-NEXT:    movl $64, %edx
374; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
375; CHECK-NEXT:    movq %rbx, %rax
376; CHECK-NEXT:    popq %rbx
377; CHECK-NEXT:    .cfi_def_cfa_offset 8
378; CHECK-NEXT:    retq
379  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 1)
380  ret i8* %P
381}
382
383define i8* @test_memcpy1_64_align8(i8* %P, i8* %Q) {
384; CHECK-LABEL: test_memcpy1_64_align8:
385; CHECK:       # %bb.0:
386; CHECK-NEXT:    pushq %rbx
387; CHECK-NEXT:    .cfi_def_cfa_offset 16
388; CHECK-NEXT:    .cfi_offset %rbx, -16
389; CHECK-NEXT:    movq %rdi, %rbx
390; CHECK-NEXT:    movl $64, %edx
391; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
392; CHECK-NEXT:    movq %rbx, %rax
393; CHECK-NEXT:    popq %rbx
394; CHECK-NEXT:    .cfi_def_cfa_offset 8
395; CHECK-NEXT:    retq
396  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 1)
397  ret i8* %P
398}
399
400
401define i8* @test_memcpy1_64_align16(i8* %P, i8* %Q) {
402; CHECK-LABEL: test_memcpy1_64_align16:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    pushq %rbx
405; CHECK-NEXT:    .cfi_def_cfa_offset 16
406; CHECK-NEXT:    .cfi_offset %rbx, -16
407; CHECK-NEXT:    movq %rdi, %rbx
408; CHECK-NEXT:    movl $64, %edx
409; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
410; CHECK-NEXT:    movq %rbx, %rax
411; CHECK-NEXT:    popq %rbx
412; CHECK-NEXT:    .cfi_def_cfa_offset 8
413; CHECK-NEXT:    retq
414  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1)
415  ret i8* %P
416}
417
418
419; Make sure that different source & dest alignments are handled correctly.
420define i8* @test_memcpy1_64_diff_aligns(i8* %P, i8* %Q) {
421; CHECK-LABEL: test_memcpy1_64_diff_aligns:
422; CHECK:       # %bb.0:
423; CHECK-NEXT:    pushq %rbx
424; CHECK-NEXT:    .cfi_def_cfa_offset 16
425; CHECK-NEXT:    .cfi_offset %rbx, -16
426; CHECK-NEXT:    movq %rdi, %rbx
427; CHECK-NEXT:    movl $64, %edx
428; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_1
429; CHECK-NEXT:    movq %rbx, %rax
430; CHECK-NEXT:    popq %rbx
431; CHECK-NEXT:    .cfi_def_cfa_offset 8
432; CHECK-NEXT:    retq
433  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1)
434  ret i8* %P
435}
436
437define i8* @test_memcpy2_64(i8* %P, i8* %Q) {
438; CHECK-LABEL: test_memcpy2_64:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    pushq %rbx
441; CHECK-NEXT:    .cfi_def_cfa_offset 16
442; CHECK-NEXT:    .cfi_offset %rbx, -16
443; CHECK-NEXT:    movq %rdi, %rbx
444; CHECK-NEXT:    movl $64, %edx
445; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_2
446; CHECK-NEXT:    movq %rbx, %rax
447; CHECK-NEXT:    popq %rbx
448; CHECK-NEXT:    .cfi_def_cfa_offset 8
449; CHECK-NEXT:    retq
450  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2)
451  ret i8* %P
452}
453
454define i8* @test_memcpy4_64(i8* %P, i8* %Q) {
455; CHECK-LABEL: test_memcpy4_64:
456; CHECK:       # %bb.0:
457; CHECK-NEXT:    pushq %rbx
458; CHECK-NEXT:    .cfi_def_cfa_offset 16
459; CHECK-NEXT:    .cfi_offset %rbx, -16
460; CHECK-NEXT:    movq %rdi, %rbx
461; CHECK-NEXT:    movl $64, %edx
462; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_4
463; CHECK-NEXT:    movq %rbx, %rax
464; CHECK-NEXT:    popq %rbx
465; CHECK-NEXT:    .cfi_def_cfa_offset 8
466; CHECK-NEXT:    retq
467  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4)
468  ret i8* %P
469}
470
471define i8* @test_memcpy8_64(i8* %P, i8* %Q) {
472; CHECK-LABEL: test_memcpy8_64:
473; CHECK:       # %bb.0:
474; CHECK-NEXT:    pushq %rbx
475; CHECK-NEXT:    .cfi_def_cfa_offset 16
476; CHECK-NEXT:    .cfi_offset %rbx, -16
477; CHECK-NEXT:    movq %rdi, %rbx
478; CHECK-NEXT:    movl $64, %edx
479; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_8
480; CHECK-NEXT:    movq %rbx, %rax
481; CHECK-NEXT:    popq %rbx
482; CHECK-NEXT:    .cfi_def_cfa_offset 8
483; CHECK-NEXT:    retq
484  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8)
485  ret i8* %P
486}
487
488define i8* @test_memcpy16_64(i8* %P, i8* %Q) {
489; CHECK-LABEL: test_memcpy16_64:
490; CHECK:       # %bb.0:
491; CHECK-NEXT:    pushq %rbx
492; CHECK-NEXT:    .cfi_def_cfa_offset 16
493; CHECK-NEXT:    .cfi_offset %rbx, -16
494; CHECK-NEXT:    movq %rdi, %rbx
495; CHECK-NEXT:    movl $64, %edx
496; CHECK-NEXT:    callq __llvm_memcpy_element_unordered_atomic_16
497; CHECK-NEXT:    movq %rbx, %rax
498; CHECK-NEXT:    popq %rbx
499; CHECK-NEXT:    .cfi_def_cfa_offset 8
500; CHECK-NEXT:    retq
501  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16)
502  ret i8* %P
503}
504
505; ==================================
506
507define i8* @test_memmove1_64(i8* %P, i8* %Q) {
508; CHECK-LABEL: test_memmove1_64:
509; CHECK:       # %bb.0:
510; CHECK-NEXT:    pushq %rbx
511; CHECK-NEXT:    .cfi_def_cfa_offset 16
512; CHECK-NEXT:    .cfi_offset %rbx, -16
513; CHECK-NEXT:    movq %rdi, %rbx
514; CHECK-NEXT:    movl $64, %edx
515; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_1
516; CHECK-NEXT:    movq %rbx, %rax
517; CHECK-NEXT:    popq %rbx
518; CHECK-NEXT:    .cfi_def_cfa_offset 8
519; CHECK-NEXT:    retq
520  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1)
521  ret i8* %P
522}
523
524; Ensure align 16 generates vector load/stores even with small element size
525define i8* @test_memmove1_64_align16(i8* %P, i8* %Q) {
526; CHECK-LABEL: test_memmove1_64_align16:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    pushq %rbx
529; CHECK-NEXT:    .cfi_def_cfa_offset 16
530; CHECK-NEXT:    .cfi_offset %rbx, -16
531; CHECK-NEXT:    movq %rdi, %rbx
532; CHECK-NEXT:    movl $64, %edx
533; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_1
534; CHECK-NEXT:    movq %rbx, %rax
535; CHECK-NEXT:    popq %rbx
536; CHECK-NEXT:    .cfi_def_cfa_offset 8
537; CHECK-NEXT:    retq
538  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1)
539  ret i8* %P
540}
541
542; Make sure that different source & dest alignments are handled correctly.
543define i8* @test_memmove1_64_diff_aligns(i8* %P, i8* %Q) {
544; CHECK-LABEL: test_memmove1_64_diff_aligns:
545; CHECK:       # %bb.0:
546; CHECK-NEXT:    pushq %rbx
547; CHECK-NEXT:    .cfi_def_cfa_offset 16
548; CHECK-NEXT:    .cfi_offset %rbx, -16
549; CHECK-NEXT:    movq %rdi, %rbx
550; CHECK-NEXT:    movl $64, %edx
551; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_1
552; CHECK-NEXT:    movq %rbx, %rax
553; CHECK-NEXT:    popq %rbx
554; CHECK-NEXT:    .cfi_def_cfa_offset 8
555; CHECK-NEXT:    retq
556  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1)
557  ret i8* %P
558}
559
560define i8* @test_memmove2_64(i8* %P, i8* %Q) {
561; CHECK-LABEL: test_memmove2_64:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    pushq %rbx
564; CHECK-NEXT:    .cfi_def_cfa_offset 16
565; CHECK-NEXT:    .cfi_offset %rbx, -16
566; CHECK-NEXT:    movq %rdi, %rbx
567; CHECK-NEXT:    movl $64, %edx
568; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_2
569; CHECK-NEXT:    movq %rbx, %rax
570; CHECK-NEXT:    popq %rbx
571; CHECK-NEXT:    .cfi_def_cfa_offset 8
572; CHECK-NEXT:    retq
573  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2)
574  ret i8* %P
575}
576
577define i8* @test_memmove4_64(i8* %P, i8* %Q) {
578; CHECK-LABEL: test_memmove4_64:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    pushq %rbx
581; CHECK-NEXT:    .cfi_def_cfa_offset 16
582; CHECK-NEXT:    .cfi_offset %rbx, -16
583; CHECK-NEXT:    movq %rdi, %rbx
584; CHECK-NEXT:    movl $64, %edx
585; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_4
586; CHECK-NEXT:    movq %rbx, %rax
587; CHECK-NEXT:    popq %rbx
588; CHECK-NEXT:    .cfi_def_cfa_offset 8
589; CHECK-NEXT:    retq
590  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4)
591  ret i8* %P
592}
593
594define i8* @test_memmove8_64(i8* %P, i8* %Q) {
595; CHECK-LABEL: test_memmove8_64:
596; CHECK:       # %bb.0:
597; CHECK-NEXT:    pushq %rbx
598; CHECK-NEXT:    .cfi_def_cfa_offset 16
599; CHECK-NEXT:    .cfi_offset %rbx, -16
600; CHECK-NEXT:    movq %rdi, %rbx
601; CHECK-NEXT:    movl $64, %edx
602; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_8
603; CHECK-NEXT:    movq %rbx, %rax
604; CHECK-NEXT:    popq %rbx
605; CHECK-NEXT:    .cfi_def_cfa_offset 8
606; CHECK-NEXT:    retq
607  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8)
608  ret i8* %P
609}
610
611define i8* @test_memmove16_64(i8* %P, i8* %Q) {
612; CHECK-LABEL: test_memmove16_64:
613; CHECK:       # %bb.0:
614; CHECK-NEXT:    pushq %rbx
615; CHECK-NEXT:    .cfi_def_cfa_offset 16
616; CHECK-NEXT:    .cfi_offset %rbx, -16
617; CHECK-NEXT:    movq %rdi, %rbx
618; CHECK-NEXT:    movl $64, %edx
619; CHECK-NEXT:    callq __llvm_memmove_element_unordered_atomic_16
620; CHECK-NEXT:    movq %rbx, %rax
621; CHECK-NEXT:    popq %rbx
622; CHECK-NEXT:    .cfi_def_cfa_offset 8
623; CHECK-NEXT:    retq
624  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16)
625  ret i8* %P
626}
627
628; ==============================
629
630
631define i8* @test_memset1_64(i8* %P, i8 %V) {
632; CHECK-LABEL: test_memset1_64:
633; CHECK:       # %bb.0:
634; CHECK-NEXT:    pushq %rbx
635; CHECK-NEXT:    .cfi_def_cfa_offset 16
636; CHECK-NEXT:    .cfi_offset %rbx, -16
637; CHECK-NEXT:    movq %rdi, %rbx
638; CHECK-NEXT:    movl $64, %edx
639; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_1
640; CHECK-NEXT:    movq %rbx, %rax
641; CHECK-NEXT:    popq %rbx
642; CHECK-NEXT:    .cfi_def_cfa_offset 8
643; CHECK-NEXT:    retq
644  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 64, i32 1)
645  ret i8* %P
646}
647
648define i8* @test_memset1_64_align16(i8* %P, i8 %V) {
649; CHECK-LABEL: test_memset1_64_align16:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    pushq %rbx
652; CHECK-NEXT:    .cfi_def_cfa_offset 16
653; CHECK-NEXT:    .cfi_offset %rbx, -16
654; CHECK-NEXT:    movq %rdi, %rbx
655; CHECK-NEXT:    movl $64, %edx
656; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_1
657; CHECK-NEXT:    movq %rbx, %rax
658; CHECK-NEXT:    popq %rbx
659; CHECK-NEXT:    .cfi_def_cfa_offset 8
660; CHECK-NEXT:    retq
661  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 1)
662  ret i8* %P
663}
664
665define i8* @test_memset2_64(i8* %P, i8 %V) {
666; CHECK-LABEL: test_memset2_64:
667; CHECK:       # %bb.0:
668; CHECK-NEXT:    pushq %rbx
669; CHECK-NEXT:    .cfi_def_cfa_offset 16
670; CHECK-NEXT:    .cfi_offset %rbx, -16
671; CHECK-NEXT:    movq %rdi, %rbx
672; CHECK-NEXT:    movl $64, %edx
673; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_2
674; CHECK-NEXT:    movq %rbx, %rax
675; CHECK-NEXT:    popq %rbx
676; CHECK-NEXT:    .cfi_def_cfa_offset 8
677; CHECK-NEXT:    retq
678  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %P, i8 %V, i32 64, i32 2)
679  ret i8* %P
680}
681
682;; Use the memset4 case to explore alignment and sizing requirements in the
683;; lowering
684
685define i8* @test_memset4_64(i8* %P, i8 %V) {
686; CHECK-LABEL: test_memset4_64:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    pushq %rbx
689; CHECK-NEXT:    .cfi_def_cfa_offset 16
690; CHECK-NEXT:    .cfi_offset %rbx, -16
691; CHECK-NEXT:    movq %rdi, %rbx
692; CHECK-NEXT:    movl $64, %edx
693; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
694; CHECK-NEXT:    movq %rbx, %rax
695; CHECK-NEXT:    popq %rbx
696; CHECK-NEXT:    .cfi_def_cfa_offset 8
697; CHECK-NEXT:    retq
698  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 64, i32 4)
699  ret i8* %P
700}
701
702define i8* @test_memset4_64_align8(i8* %P, i8 %V) {
703; CHECK-LABEL: test_memset4_64_align8:
704; CHECK:       # %bb.0:
705; CHECK-NEXT:    pushq %rbx
706; CHECK-NEXT:    .cfi_def_cfa_offset 16
707; CHECK-NEXT:    .cfi_offset %rbx, -16
708; CHECK-NEXT:    movq %rdi, %rbx
709; CHECK-NEXT:    movl $64, %edx
710; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
711; CHECK-NEXT:    movq %rbx, %rax
712; CHECK-NEXT:    popq %rbx
713; CHECK-NEXT:    .cfi_def_cfa_offset 8
714; CHECK-NEXT:    retq
715  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 64, i32 4)
716  ret i8* %P
717}
718
719define i8* @test_memset4_64_align16(i8* %P, i8 %V) {
720; CHECK-LABEL: test_memset4_64_align16:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    pushq %rbx
723; CHECK-NEXT:    .cfi_def_cfa_offset 16
724; CHECK-NEXT:    .cfi_offset %rbx, -16
725; CHECK-NEXT:    movq %rdi, %rbx
726; CHECK-NEXT:    movl $64, %edx
727; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
728; CHECK-NEXT:    movq %rbx, %rax
729; CHECK-NEXT:    popq %rbx
730; CHECK-NEXT:    .cfi_def_cfa_offset 8
731; CHECK-NEXT:    retq
732  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 4)
733  ret i8* %P
734}
735
736define i8* @test_memset4_64_align64(i8* %P, i8 %V) {
737; CHECK-LABEL: test_memset4_64_align64:
738; CHECK:       # %bb.0:
739; CHECK-NEXT:    pushq %rbx
740; CHECK-NEXT:    .cfi_def_cfa_offset 16
741; CHECK-NEXT:    .cfi_offset %rbx, -16
742; CHECK-NEXT:    movq %rdi, %rbx
743; CHECK-NEXT:    movl $64, %edx
744; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
745; CHECK-NEXT:    movq %rbx, %rax
746; CHECK-NEXT:    popq %rbx
747; CHECK-NEXT:    .cfi_def_cfa_offset 8
748; CHECK-NEXT:    retq
749  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 64 %P, i8 %V, i32 64, i32 4)
750  ret i8* %P
751}
752
753define i8* @test_memset4_4(i8* %P, i8 %V) {
754; CHECK-LABEL: test_memset4_4:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    pushq %rbx
757; CHECK-NEXT:    .cfi_def_cfa_offset 16
758; CHECK-NEXT:    .cfi_offset %rbx, -16
759; CHECK-NEXT:    movq %rdi, %rbx
760; CHECK-NEXT:    movl $4, %edx
761; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
762; CHECK-NEXT:    movq %rbx, %rax
763; CHECK-NEXT:    popq %rbx
764; CHECK-NEXT:    .cfi_def_cfa_offset 8
765; CHECK-NEXT:    retq
766  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4)
767  ret i8* %P
768}
769
770define i8* @test_memset4_8(i8* %P, i8 %V) {
771; CHECK-LABEL: test_memset4_8:
772; CHECK:       # %bb.0:
773; CHECK-NEXT:    pushq %rbx
774; CHECK-NEXT:    .cfi_def_cfa_offset 16
775; CHECK-NEXT:    .cfi_offset %rbx, -16
776; CHECK-NEXT:    movq %rdi, %rbx
777; CHECK-NEXT:    movl $8, %edx
778; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
779; CHECK-NEXT:    movq %rbx, %rax
780; CHECK-NEXT:    popq %rbx
781; CHECK-NEXT:    .cfi_def_cfa_offset 8
782; CHECK-NEXT:    retq
783  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 8, i32 4)
784  ret i8* %P
785}
786
787define i8* @test_memset4_8_align8(i8* %P, i8 %V) {
788; CHECK-LABEL: test_memset4_8_align8:
789; CHECK:       # %bb.0:
790; CHECK-NEXT:    pushq %rbx
791; CHECK-NEXT:    .cfi_def_cfa_offset 16
792; CHECK-NEXT:    .cfi_offset %rbx, -16
793; CHECK-NEXT:    movq %rdi, %rbx
794; CHECK-NEXT:    movl $8, %edx
795; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
796; CHECK-NEXT:    movq %rbx, %rax
797; CHECK-NEXT:    popq %rbx
798; CHECK-NEXT:    .cfi_def_cfa_offset 8
799; CHECK-NEXT:    retq
800  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 4)
801  ret i8* %P
802}
803
804define i8* @test_memset4_12(i8* %P, i8 %V) {
805; CHECK-LABEL: test_memset4_12:
806; CHECK:       # %bb.0:
807; CHECK-NEXT:    pushq %rbx
808; CHECK-NEXT:    .cfi_def_cfa_offset 16
809; CHECK-NEXT:    .cfi_offset %rbx, -16
810; CHECK-NEXT:    movq %rdi, %rbx
811; CHECK-NEXT:    movl $12, %edx
812; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
813; CHECK-NEXT:    movq %rbx, %rax
814; CHECK-NEXT:    popq %rbx
815; CHECK-NEXT:    .cfi_def_cfa_offset 8
816; CHECK-NEXT:    retq
817  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 12, i32 4)
818  ret i8* %P
819}
820
821define i8* @test_memset4_16(i8* %P, i8 %V) {
822; CHECK-LABEL: test_memset4_16:
823; CHECK:       # %bb.0:
824; CHECK-NEXT:    pushq %rbx
825; CHECK-NEXT:    .cfi_def_cfa_offset 16
826; CHECK-NEXT:    .cfi_offset %rbx, -16
827; CHECK-NEXT:    movq %rdi, %rbx
828; CHECK-NEXT:    movl $16, %edx
829; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
830; CHECK-NEXT:    movq %rbx, %rax
831; CHECK-NEXT:    popq %rbx
832; CHECK-NEXT:    .cfi_def_cfa_offset 8
833; CHECK-NEXT:    retq
834  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 16, i32 4)
835  ret i8* %P
836}
837
838define i8* @test_memset4_16_align16(i8* %P, i8 %V) {
839; CHECK-LABEL: test_memset4_16_align16:
840; CHECK:       # %bb.0:
841; CHECK-NEXT:    pushq %rbx
842; CHECK-NEXT:    .cfi_def_cfa_offset 16
843; CHECK-NEXT:    .cfi_offset %rbx, -16
844; CHECK-NEXT:    movq %rdi, %rbx
845; CHECK-NEXT:    movl $16, %edx
846; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
847; CHECK-NEXT:    movq %rbx, %rax
848; CHECK-NEXT:    popq %rbx
849; CHECK-NEXT:    .cfi_def_cfa_offset 8
850; CHECK-NEXT:    retq
851  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 4)
852  ret i8* %P
853}
854
855define i8* @test_memset4_60(i8* %P, i8 %V) {
856; CHECK-LABEL: test_memset4_60:
857; CHECK:       # %bb.0:
858; CHECK-NEXT:    pushq %rbx
859; CHECK-NEXT:    .cfi_def_cfa_offset 16
860; CHECK-NEXT:    .cfi_offset %rbx, -16
861; CHECK-NEXT:    movq %rdi, %rbx
862; CHECK-NEXT:    movl $60, %edx
863; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_4
864; CHECK-NEXT:    movq %rbx, %rax
865; CHECK-NEXT:    popq %rbx
866; CHECK-NEXT:    .cfi_def_cfa_offset 8
867; CHECK-NEXT:    retq
868  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 60, i32 4)
869  ret i8* %P
870}
871
872define i8* @test_memset8_64(i8* %P, i8 %V) {
873; CHECK-LABEL: test_memset8_64:
874; CHECK:       # %bb.0:
875; CHECK-NEXT:    pushq %rbx
876; CHECK-NEXT:    .cfi_def_cfa_offset 16
877; CHECK-NEXT:    .cfi_offset %rbx, -16
878; CHECK-NEXT:    movq %rdi, %rbx
879; CHECK-NEXT:    movl $64, %edx
880; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_8
881; CHECK-NEXT:    movq %rbx, %rax
882; CHECK-NEXT:    popq %rbx
883; CHECK-NEXT:    .cfi_def_cfa_offset 8
884; CHECK-NEXT:    retq
885  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 64, i32 8)
886  ret i8* %P
887}
888
889define i8* @test_memset16_64(i8* %P, i8 %V) {
890; CHECK-LABEL: test_memset16_64:
891; CHECK:       # %bb.0:
892; CHECK-NEXT:    pushq %rbx
893; CHECK-NEXT:    .cfi_def_cfa_offset 16
894; CHECK-NEXT:    .cfi_offset %rbx, -16
895; CHECK-NEXT:    movq %rdi, %rbx
896; CHECK-NEXT:    movl $64, %edx
897; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_16
898; CHECK-NEXT:    movq %rbx, %rax
899; CHECK-NEXT:    popq %rbx
900; CHECK-NEXT:    .cfi_def_cfa_offset 8
901; CHECK-NEXT:    retq
902  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 16)
903  ret i8* %P
904}
905
906define i8* @test_memset16_64_zero(i8* %P) {
907; CHECK-LABEL: test_memset16_64_zero:
908; CHECK:       # %bb.0:
909; CHECK-NEXT:    pushq %rbx
910; CHECK-NEXT:    .cfi_def_cfa_offset 16
911; CHECK-NEXT:    .cfi_offset %rbx, -16
912; CHECK-NEXT:    movq %rdi, %rbx
913; CHECK-NEXT:    xorl %esi, %esi
914; CHECK-NEXT:    movl $64, %edx
915; CHECK-NEXT:    callq __llvm_memset_element_unordered_atomic_16
916; CHECK-NEXT:    movq %rbx, %rax
917; CHECK-NEXT:    popq %rbx
918; CHECK-NEXT:    .cfi_def_cfa_offset 8
919; CHECK-NEXT:    retq
920  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 0, i32 64, i32 16)
921  ret i8* %P
922}
923
924
925declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
926declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
927declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind
928