1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s 3; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s 4 5; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing 6; bytes are known to be undef. 7 8 9%T = type { i64, i32, i32 } 10 11define void @test_alloca(i8* %result) { 12; CHECK-LABEL: @test_alloca( 13; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 14; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 15; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) 16; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) 17; CHECK-NEXT: ret void 18; 19 %a = alloca %T, align 8 20 %b = bitcast %T* %a to i8* 21 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false) 22 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 23 ret void 24} 25 26define void @test_alloca_with_lifetimes(i8* %result) { 27; CHECK-LABEL: @test_alloca_with_lifetimes( 28; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 29; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 30; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]]) 31; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) 32; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) 33; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]]) 34; CHECK-NEXT: ret void 35; 36 %a = alloca %T, align 8 37 %b = bitcast %T* %a to i8* 38 call void @llvm.lifetime.start.p0i8(i64 16, i8* %b) 39 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false) 40 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 41 call void @llvm.lifetime.end.p0i8(i64 16, i8* %b) 42 ret void 43} 44 45define void @test_malloc_with_lifetimes(i8* %result) { 46; CHECK-LABEL: @test_malloc_with_lifetimes( 47; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16) 48; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]]) 49; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false) 50; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) 51; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]]) 52; CHECK-NEXT: call void @free(i8* [[A]]) 53; CHECK-NEXT: ret void 54; 55 %a = call i8* @malloc(i64 16) 56 call void @llvm.lifetime.start.p0i8(i64 16, i8* %a) 57 call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false) 58 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false) 59 call void @llvm.lifetime.end.p0i8(i64 16, i8* %a) 60 call void @free(i8* %a) 61 ret void 62} 63 64; memcpy size is larger than lifetime, don't optimize. 65define void @test_copy_larger_than_lifetime_size(i8* %result) { 66; CHECK-LABEL: @test_copy_larger_than_lifetime_size( 67; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16) 68; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A]]) 69; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false) 70; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false) 71; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 12, i8* [[A]]) 72; CHECK-NEXT: call void @free(i8* [[A]]) 73; CHECK-NEXT: ret void 74; 75 %a = call i8* @malloc(i64 16) 76 call void @llvm.lifetime.start.p0i8(i64 12, i8* %a) 77 call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false) 78 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false) 79 call void @llvm.lifetime.end.p0i8(i64 12, i8* %a) 80 call void @free(i8* %a) 81 ret void 82} 83 84; The trailing bytes are not known to be undef, we can't ignore them. 85define void @test_not_undef_memory(i8* %result, i8* %input) { 86; CHECK-LABEL: @test_not_undef_memory( 87; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false) 88; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[INPUT]], i64 16, i1 false) 89; CHECK-NEXT: ret void 90; 91 call void @llvm.memset.p0i8.i64(i8* align 8 %input, i8 0, i64 12, i1 false) 92 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %input, i64 16, i1 false) 93 ret void 94} 95 96; Memset is volatile, memcpy is not. Can be optimized. 97define void @test_volatile_memset(i8* %result) { 98; CHECK-LABEL: @test_volatile_memset( 99; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 100; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 101; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true) 102; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) 103; CHECK-NEXT: ret void 104; 105 %a = alloca %T, align 8 106 %b = bitcast %T* %a to i8* 107 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 true) 108 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 109 ret void 110} 111 112; Memcpy is volatile, memset is not. Cannot be optimized. 113define void @test_volatile_memcpy(i8* %result) { 114; CHECK-LABEL: @test_volatile_memcpy( 115; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 116; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 117; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) 118; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 true) 119; CHECK-NEXT: ret void 120; 121 %a = alloca %T, align 8 122 %b = bitcast %T* %a to i8* 123 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false) 124 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 true) 125 ret void 126} 127 128; Write between memset and memcpy, can't optimize. 129define void @test_write_between(i8* %result) { 130; CHECK-LABEL: @test_write_between( 131; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 132; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 133; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) 134; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 135; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) 136; CHECK-NEXT: ret void 137; 138 %a = alloca %T, align 8 139 %b = bitcast %T* %a to i8* 140 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false) 141 store i8 -1, i8* %b 142 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 143 ret void 144} 145 146; A write prior to the memset, which is part of the memset region. 147; We could optimize this, but currently don't, because the used memory location is imprecise. 148define void @test_write_before_memset_in_memset_region(i8* %result) { 149; CHECK-LABEL: @test_write_before_memset_in_memset_region( 150; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 151; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 152; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 153; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) 154; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) 155; CHECK-NEXT: ret void 156; 157 %a = alloca %T, align 8 158 %b = bitcast %T* %a to i8* 159 store i8 -1, i8* %b 160 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false) 161 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 162 ret void 163} 164 165; A write prior to the memset, which is part of the memcpy (but not memset) region. 166; This cannot be optimized. 167define void @test_write_before_memset_in_memcpy_region(i8* %result) { 168; CHECK-LABEL: @test_write_before_memset_in_memcpy_region( 169; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 170; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 171; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2 172; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 173; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) 174; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) 175; CHECK-NEXT: ret void 176; 177 %a = alloca %T, align 8 178 %b = bitcast %T* %a to i8* 179 %c = getelementptr inbounds %T, %T* %a, i64 0, i32 2 180 store i32 -1, i32* %c 181 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false) 182 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 183 ret void 184} 185 186; A write prior to the memset, which is part of both the memset and memcpy regions. 187; This cannot be optimized. 188define void @test_write_before_memset_in_both_regions(i8* %result) { 189; CHECK-LABEL: @test_write_before_memset_in_both_regions( 190; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 191; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* 192; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1 193; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 194; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false) 195; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) 196; CHECK-NEXT: ret void 197; 198 %a = alloca %T, align 8 199 %b = bitcast %T* %a to i8* 200 %c = getelementptr inbounds %T, %T* %a, i64 0, i32 1 201 store i32 -1, i32* %c 202 call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 10, i1 false) 203 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false) 204 ret void 205} 206 207declare i8* @malloc(i64) 208declare void @free(i8*) 209 210declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) 211declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) 212 213declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 214declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 215