• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s
3; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
4
5; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing
6; bytes are known to be undef.
7
8
9%T = type { i64, i32, i32 }
10
11define void @test_alloca(i8* %result) {
12; CHECK-LABEL: @test_alloca(
13; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
14; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
15; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
16; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
17; CHECK-NEXT:    ret void
18;
19  %a = alloca %T, align 8
20  %b = bitcast %T* %a to i8*
21  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
22  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
23  ret void
24}
25
26define void @test_alloca_with_lifetimes(i8* %result) {
27; CHECK-LABEL: @test_alloca_with_lifetimes(
28; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
29; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
30; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
31; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
32; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
33; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
34; CHECK-NEXT:    ret void
35;
36  %a = alloca %T, align 8
37  %b = bitcast %T* %a to i8*
38  call void @llvm.lifetime.start.p0i8(i64 16, i8* %b)
39  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
40  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
41  call void @llvm.lifetime.end.p0i8(i64 16, i8* %b)
42  ret void
43}
44
45define void @test_malloc_with_lifetimes(i8* %result) {
46; CHECK-LABEL: @test_malloc_with_lifetimes(
47; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
48; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
49; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
50; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
51; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
52; CHECK-NEXT:    call void @free(i8* [[A]])
53; CHECK-NEXT:    ret void
54;
55  %a = call i8* @malloc(i64 16)
56  call void @llvm.lifetime.start.p0i8(i64 16, i8* %a)
57  call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
58  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
59  call void @llvm.lifetime.end.p0i8(i64 16, i8* %a)
60  call void @free(i8* %a)
61  ret void
62}
63
64; memcpy size is larger than lifetime, don't optimize.
65define void @test_copy_larger_than_lifetime_size(i8* %result) {
66; CHECK-LABEL: @test_copy_larger_than_lifetime_size(
67; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
68; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A]])
69; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
70; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
71; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 12, i8* [[A]])
72; CHECK-NEXT:    call void @free(i8* [[A]])
73; CHECK-NEXT:    ret void
74;
75  %a = call i8* @malloc(i64 16)
76  call void @llvm.lifetime.start.p0i8(i64 12, i8* %a)
77  call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
78  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
79  call void @llvm.lifetime.end.p0i8(i64 12, i8* %a)
80  call void @free(i8* %a)
81  ret void
82}
83
84; The trailing bytes are not known to be undef, we can't ignore them.
85define void @test_not_undef_memory(i8* %result, i8* %input) {
86; CHECK-LABEL: @test_not_undef_memory(
87; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false)
88; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[INPUT]], i64 16, i1 false)
89; CHECK-NEXT:    ret void
90;
91  call void @llvm.memset.p0i8.i64(i8* align 8 %input, i8 0, i64 12, i1 false)
92  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %input, i64 16, i1 false)
93  ret void
94}
95
96; Memset is volatile, memcpy is not. Can be optimized.
97define void @test_volatile_memset(i8* %result) {
98; CHECK-LABEL: @test_volatile_memset(
99; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
100; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
101; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
102; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
103; CHECK-NEXT:    ret void
104;
105  %a = alloca %T, align 8
106  %b = bitcast %T* %a to i8*
107  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 true)
108  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
109  ret void
110}
111
112; Memcpy is volatile, memset is not. Cannot be optimized.
113define void @test_volatile_memcpy(i8* %result) {
114; CHECK-LABEL: @test_volatile_memcpy(
115; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
116; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
117; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
118; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 true)
119; CHECK-NEXT:    ret void
120;
121  %a = alloca %T, align 8
122  %b = bitcast %T* %a to i8*
123  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
124  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 true)
125  ret void
126}
127
128; Write between memset and memcpy, can't optimize.
129define void @test_write_between(i8* %result) {
130; CHECK-LABEL: @test_write_between(
131; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
132; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
133; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
134; CHECK-NEXT:    store i8 -1, i8* [[B]], align 1
135; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
136; CHECK-NEXT:    ret void
137;
138  %a = alloca %T, align 8
139  %b = bitcast %T* %a to i8*
140  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
141  store i8 -1, i8* %b
142  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
143  ret void
144}
145
146; A write prior to the memset, which is part of the memset region.
147; We could optimize this, but currently don't, because the used memory location is imprecise.
148define void @test_write_before_memset_in_memset_region(i8* %result) {
149; CHECK-LABEL: @test_write_before_memset_in_memset_region(
150; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
151; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
152; CHECK-NEXT:    store i8 -1, i8* [[B]], align 1
153; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
154; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
155; CHECK-NEXT:    ret void
156;
157  %a = alloca %T, align 8
158  %b = bitcast %T* %a to i8*
159  store i8 -1, i8* %b
160  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
161  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
162  ret void
163}
164
165; A write prior to the memset, which is part of the memcpy (but not memset) region.
166; This cannot be optimized.
167define void @test_write_before_memset_in_memcpy_region(i8* %result) {
168; CHECK-LABEL: @test_write_before_memset_in_memcpy_region(
169; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
170; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
171; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2
172; CHECK-NEXT:    store i32 -1, i32* [[C]], align 4
173; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
174; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
175; CHECK-NEXT:    ret void
176;
177  %a = alloca %T, align 8
178  %b = bitcast %T* %a to i8*
179  %c = getelementptr inbounds %T, %T* %a, i64 0, i32 2
180  store i32 -1, i32* %c
181  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
182  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
183  ret void
184}
185
186; A write prior to the memset, which is part of both the memset and memcpy regions.
187; This cannot be optimized.
188define void @test_write_before_memset_in_both_regions(i8* %result) {
189; CHECK-LABEL: @test_write_before_memset_in_both_regions(
190; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
191; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
192; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1
193; CHECK-NEXT:    store i32 -1, i32* [[C]], align 4
194; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false)
195; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
196; CHECK-NEXT:    ret void
197;
198  %a = alloca %T, align 8
199  %b = bitcast %T* %a to i8*
200  %c = getelementptr inbounds %T, %T* %a, i64 0, i32 1
201  store i32 -1, i32* %c
202  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 10, i1 false)
203  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
204  ret void
205}
206
207declare i8* @malloc(i64)
208declare void @free(i8*)
209
210declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
211declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
212
213declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
214declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
215