1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -early-cse < %s | FileCheck %s 3 4; Unequal mask check. 5 6; Load-load: the second load can be removed if (assuming unequal masks) the 7; second loaded value is a subset of the first loaded value considering the 8; non-undef vector elements. In other words, if the second mask is a submask 9; of the first one, and the through value of the second load is undef. 10 11; Load-load, second mask is a submask of the first, second through is undef. 12; Expect the second load to be removed. 13define <4 x i32> @f3(<4 x i32>* %a0, <4 x i32> %a1) { 14; CHECK-LABEL: @f3( 15; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) 16; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V0]] 17; CHECK-NEXT: ret <4 x i32> [[V2]] 18; 19 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) 20 %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) 21 %v2 = add <4 x i32> %v0, %v1 22 ret <4 x i32> %v2 23} 24 25; Load-load, second mask is a submask of the first, second through is not undef. 26; Expect the second load to remain. 27define <4 x i32> @f4(<4 x i32>* %a0, <4 x i32> %a1) { 28; CHECK-LABEL: @f4( 29; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) 30; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 31; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] 32; CHECK-NEXT: ret <4 x i32> [[V2]] 33; 34 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) 35 %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 36 %v2 = add <4 x i32> %v0, %v1 37 ret <4 x i32> %v2 38} 39 40; Load-load, second mask is not a submask of the first, second through is undef. 41; Expect the second load to remain. 42define <4 x i32> @f5(<4 x i32>* %a0, <4 x i32> %a1) { 43; CHECK-LABEL: @f5( 44; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) 45; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 46; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] 47; CHECK-NEXT: ret <4 x i32> [[V2]] 48; 49 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) 50 %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 51 %v2 = add <4 x i32> %v0, %v1 52 ret <4 x i32> %v2 53} 54 55; Store-store: the first store can be removed if the first; mask is a submask 56; of the second mask. 57 58; Store-store, first mask is a submask of the second. 59; Expect the first store to be removed. 60define void @f6(<4 x i32> %a0, <4 x i32>* %a1) { 61; CHECK-LABEL: @f6( 62; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 63; CHECK-NEXT: ret void 64; 65 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 66 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 67 ret void 68} 69 70; Store-store, first mask is not a submask of the second. 71; Expect both stores to remain. 72define void @f7(<4 x i32> %a0, <4 x i32>* %a1) { 73; CHECK-LABEL: @f7( 74; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 75; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0]], <4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 76; CHECK-NEXT: ret void 77; 78 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 79 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 80 ret void 81} 82 83; Load-store: the store can be removed if the store's mask is a submask of the 84; load's mask. 85 86; Load-store, second mask is a submask of the first. 87; Expect the store to be removed. 88define <4 x i32> @f8(<4 x i32>* %a0, <4 x i32> %a1) { 89; CHECK-LABEL: @f8( 90; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) 91; CHECK-NEXT: ret <4 x i32> [[V0]] 92; 93 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) 94 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v0, <4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 95 ret <4 x i32> %v0 96} 97 98; Load-store, second mask is not a submask of the first. 99; Expect the store to remain. 100define <4 x i32> @f9(<4 x i32>* %a0, <4 x i32> %a1) { 101; CHECK-LABEL: @f9( 102; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) 103; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[V0]], <4 x i32>* [[A0]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 104; CHECK-NEXT: ret <4 x i32> [[V0]] 105; 106 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a1) 107 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v0, <4 x i32>* %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 108 ret <4 x i32> %v0 109} 110 111; Store-load: the load can be removed if load's mask is a submask of the 112; store's mask, and the load's through value is undef. 113 114; Store-load, load's mask is a submask of store's mask, thru is undef. 115; Expect the load to be removed. 116define <4 x i32> @fa(<4 x i32> %a0, <4 x i32>* %a1) { 117; CHECK-LABEL: @fa( 118; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 119; CHECK-NEXT: ret <4 x i32> [[A0]] 120; 121 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 122 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) 123 ret <4 x i32> %v0 124} 125 126; Store-load, load's mask is a submask of store's mask, thru is not undef. 127; Expect the load to remain. 128define <4 x i32> @fb(<4 x i32> %a0, <4 x i32>* %a1) { 129; CHECK-LABEL: @fb( 130; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 131; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 132; CHECK-NEXT: ret <4 x i32> [[V0]] 133; 134 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) 135 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) 136 ret <4 x i32> %v0 137} 138 139; Store-load, load's mask is not a submask of store's mask, thru is undef. 140; Expect the load to remain. 141define <4 x i32> @fc(<4 x i32> %a0, <4 x i32>* %a1) { 142; CHECK-LABEL: @fc( 143; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 144; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[A1]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) 145; CHECK-NEXT: ret <4 x i32> [[V0]] 146; 147 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a0, <4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) 148 %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) 149 ret <4 x i32> %v0 150} 151 152declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) 153declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) 154