1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+sse2 | FileCheck %s 3; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64--- -mattr=+avx | FileCheck %s 4 5; 6; Check that we can commute operands based on the predicate. 7; 8 9define <4 x i32> @icmp_eq_v4i32(<4 x i32> %a, i32* %b) { 10; CHECK-LABEL: @icmp_eq_v4i32( 11; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 12; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 13; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], [[A:%.*]] 14; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 15; CHECK-NEXT: ret <4 x i32> [[R]] 16; 17 %a0 = extractelement <4 x i32> %a, i32 0 18 %a1 = extractelement <4 x i32> %a, i32 1 19 %a2 = extractelement <4 x i32> %a, i32 2 20 %a3 = extractelement <4 x i32> %a, i32 3 21 %p0 = getelementptr inbounds i32, i32* %b, i32 0 22 %p1 = getelementptr inbounds i32, i32* %b, i32 1 23 %p2 = getelementptr inbounds i32, i32* %b, i32 2 24 %p3 = getelementptr inbounds i32, i32* %b, i32 3 25 %b0 = load i32, i32* %p0, align 4 26 %b1 = load i32, i32* %p1, align 4 27 %b2 = load i32, i32* %p2, align 4 28 %b3 = load i32, i32* %p3, align 4 29 %c0 = icmp eq i32 %a0, %b0 30 %c1 = icmp eq i32 %b1, %a1 31 %c2 = icmp eq i32 %b2, %a2 32 %c3 = icmp eq i32 %a3, %b3 33 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 34 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 35 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 36 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 37 %r = sext <4 x i1> %d3 to <4 x i32> 38 ret <4 x i32> %r 39} 40 41define <4 x i32> @icmp_ne_v4i32(<4 x i32> %a, i32* %b) { 42; CHECK-LABEL: @icmp_ne_v4i32( 43; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 44; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 45; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], [[A:%.*]] 46; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 47; CHECK-NEXT: ret <4 x i32> [[R]] 48; 49 %a0 = extractelement <4 x i32> %a, i32 0 50 %a1 = extractelement <4 x i32> %a, i32 1 51 %a2 = extractelement <4 x i32> %a, i32 2 52 %a3 = extractelement <4 x i32> %a, i32 3 53 %p0 = getelementptr inbounds i32, i32* %b, i32 0 54 %p1 = getelementptr inbounds i32, i32* %b, i32 1 55 %p2 = getelementptr inbounds i32, i32* %b, i32 2 56 %p3 = getelementptr inbounds i32, i32* %b, i32 3 57 %b0 = load i32, i32* %p0, align 4 58 %b1 = load i32, i32* %p1, align 4 59 %b2 = load i32, i32* %p2, align 4 60 %b3 = load i32, i32* %p3, align 4 61 %c0 = icmp ne i32 %a0, %b0 62 %c1 = icmp ne i32 %b1, %a1 63 %c2 = icmp ne i32 %b2, %a2 64 %c3 = icmp ne i32 %a3, %b3 65 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 66 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 67 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 68 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 69 %r = sext <4 x i1> %d3 to <4 x i32> 70 ret <4 x i32> %r 71} 72 73define <4 x i32> @fcmp_oeq_v4i32(<4 x float> %a, float* %b) { 74; CHECK-LABEL: @fcmp_oeq_v4i32( 75; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* 76; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 77; CHECK-NEXT: [[TMP3:%.*]] = fcmp oeq <4 x float> [[TMP2]], [[A:%.*]] 78; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 79; CHECK-NEXT: ret <4 x i32> [[R]] 80; 81 %a0 = extractelement <4 x float> %a, i32 0 82 %a1 = extractelement <4 x float> %a, i32 1 83 %a2 = extractelement <4 x float> %a, i32 2 84 %a3 = extractelement <4 x float> %a, i32 3 85 %p0 = getelementptr inbounds float, float* %b, i32 0 86 %p1 = getelementptr inbounds float, float* %b, i32 1 87 %p2 = getelementptr inbounds float, float* %b, i32 2 88 %p3 = getelementptr inbounds float, float* %b, i32 3 89 %b0 = load float, float* %p0, align 4 90 %b1 = load float, float* %p1, align 4 91 %b2 = load float, float* %p2, align 4 92 %b3 = load float, float* %p3, align 4 93 %c0 = fcmp oeq float %a0, %b0 94 %c1 = fcmp oeq float %b1, %a1 95 %c2 = fcmp oeq float %b2, %a2 96 %c3 = fcmp oeq float %a3, %b3 97 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 98 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 99 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 100 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 101 %r = sext <4 x i1> %d3 to <4 x i32> 102 ret <4 x i32> %r 103} 104 105define <4 x i32> @fcmp_uno_v4i32(<4 x float> %a, float* %b) { 106; CHECK-LABEL: @fcmp_uno_v4i32( 107; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* 108; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 109; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[TMP2]], [[A:%.*]] 110; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 111; CHECK-NEXT: ret <4 x i32> [[R]] 112; 113 %a0 = extractelement <4 x float> %a, i32 0 114 %a1 = extractelement <4 x float> %a, i32 1 115 %a2 = extractelement <4 x float> %a, i32 2 116 %a3 = extractelement <4 x float> %a, i32 3 117 %p0 = getelementptr inbounds float, float* %b, i32 0 118 %p1 = getelementptr inbounds float, float* %b, i32 1 119 %p2 = getelementptr inbounds float, float* %b, i32 2 120 %p3 = getelementptr inbounds float, float* %b, i32 3 121 %b0 = load float, float* %p0, align 4 122 %b1 = load float, float* %p1, align 4 123 %b2 = load float, float* %p2, align 4 124 %b3 = load float, float* %p3, align 4 125 %c0 = fcmp uno float %a0, %b0 126 %c1 = fcmp uno float %b1, %a1 127 %c2 = fcmp uno float %b2, %a2 128 %c3 = fcmp uno float %a3, %b3 129 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 130 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 131 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 132 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 133 %r = sext <4 x i1> %d3 to <4 x i32> 134 ret <4 x i32> %r 135} 136 137; 138; Check that we can commute operands by swapping the predicate. 139; 140 141define <4 x i32> @icmp_sgt_slt_v4i32(<4 x i32> %a, i32* %b) { 142; CHECK-LABEL: @icmp_sgt_slt_v4i32( 143; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 144; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 145; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], [[A:%.*]] 146; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 147; CHECK-NEXT: ret <4 x i32> [[R]] 148; 149 %a0 = extractelement <4 x i32> %a, i32 0 150 %a1 = extractelement <4 x i32> %a, i32 1 151 %a2 = extractelement <4 x i32> %a, i32 2 152 %a3 = extractelement <4 x i32> %a, i32 3 153 %p0 = getelementptr inbounds i32, i32* %b, i32 0 154 %p1 = getelementptr inbounds i32, i32* %b, i32 1 155 %p2 = getelementptr inbounds i32, i32* %b, i32 2 156 %p3 = getelementptr inbounds i32, i32* %b, i32 3 157 %b0 = load i32, i32* %p0, align 4 158 %b1 = load i32, i32* %p1, align 4 159 %b2 = load i32, i32* %p2, align 4 160 %b3 = load i32, i32* %p3, align 4 161 %c0 = icmp sgt i32 %a0, %b0 162 %c1 = icmp slt i32 %b1, %a1 163 %c2 = icmp slt i32 %b2, %a2 164 %c3 = icmp sgt i32 %a3, %b3 165 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 166 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 167 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 168 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 169 %r = sext <4 x i1> %d3 to <4 x i32> 170 ret <4 x i32> %r 171} 172 173define <4 x i32> @icmp_uge_ule_v4i32(<4 x i32> %a, i32* %b) { 174; CHECK-LABEL: @icmp_uge_ule_v4i32( 175; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 176; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 177; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[TMP2]], [[A:%.*]] 178; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 179; CHECK-NEXT: ret <4 x i32> [[R]] 180; 181 %a0 = extractelement <4 x i32> %a, i32 0 182 %a1 = extractelement <4 x i32> %a, i32 1 183 %a2 = extractelement <4 x i32> %a, i32 2 184 %a3 = extractelement <4 x i32> %a, i32 3 185 %p0 = getelementptr inbounds i32, i32* %b, i32 0 186 %p1 = getelementptr inbounds i32, i32* %b, i32 1 187 %p2 = getelementptr inbounds i32, i32* %b, i32 2 188 %p3 = getelementptr inbounds i32, i32* %b, i32 3 189 %b0 = load i32, i32* %p0, align 4 190 %b1 = load i32, i32* %p1, align 4 191 %b2 = load i32, i32* %p2, align 4 192 %b3 = load i32, i32* %p3, align 4 193 %c0 = icmp uge i32 %a0, %b0 194 %c1 = icmp ule i32 %b1, %a1 195 %c2 = icmp ule i32 %b2, %a2 196 %c3 = icmp uge i32 %a3, %b3 197 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 198 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 199 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 200 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 201 %r = sext <4 x i1> %d3 to <4 x i32> 202 ret <4 x i32> %r 203} 204 205define <4 x i32> @fcmp_ogt_olt_v4i32(<4 x float> %a, float* %b) { 206; CHECK-LABEL: @fcmp_ogt_olt_v4i32( 207; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>* 208; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 209; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[TMP2]], [[A:%.*]] 210; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 211; CHECK-NEXT: ret <4 x i32> [[R]] 212; 213 %a0 = extractelement <4 x float> %a, i32 0 214 %a1 = extractelement <4 x float> %a, i32 1 215 %a2 = extractelement <4 x float> %a, i32 2 216 %a3 = extractelement <4 x float> %a, i32 3 217 %p0 = getelementptr inbounds float, float* %b, i32 0 218 %p1 = getelementptr inbounds float, float* %b, i32 1 219 %p2 = getelementptr inbounds float, float* %b, i32 2 220 %p3 = getelementptr inbounds float, float* %b, i32 3 221 %b0 = load float, float* %p0, align 4 222 %b1 = load float, float* %p1, align 4 223 %b2 = load float, float* %p2, align 4 224 %b3 = load float, float* %p3, align 4 225 %c0 = fcmp ogt float %a0, %b0 226 %c1 = fcmp olt float %b1, %a1 227 %c2 = fcmp olt float %b2, %a2 228 %c3 = fcmp ogt float %a3, %b3 229 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 230 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 231 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 232 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 233 %r = sext <4 x i1> %d3 to <4 x i32> 234 ret <4 x i32> %r 235} 236 237define <4 x i32> @fcmp_ord_uno_v4i32(<4 x float> %a, float* %b) { 238; CHECK-LABEL: @fcmp_ord_uno_v4i32( 239; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 240; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 241; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 242; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 243; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1 244; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2 245; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3 246; CHECK-NEXT: [[B0:%.*]] = load float, float* [[B]], align 4 247; CHECK-NEXT: [[B1:%.*]] = load float, float* [[P1]], align 4 248; CHECK-NEXT: [[B2:%.*]] = load float, float* [[P2]], align 4 249; CHECK-NEXT: [[B3:%.*]] = load float, float* [[P3]], align 4 250; CHECK-NEXT: [[C0:%.*]] = fcmp ord float [[A0]], [[B0]] 251; CHECK-NEXT: [[C1:%.*]] = fcmp uno float [[B1]], [[A1]] 252; CHECK-NEXT: [[C2:%.*]] = fcmp uno float [[B2]], [[A2]] 253; CHECK-NEXT: [[C3:%.*]] = fcmp ord float [[A3]], [[B3]] 254; CHECK-NEXT: [[D0:%.*]] = insertelement <4 x i1> undef, i1 [[C0]], i32 0 255; CHECK-NEXT: [[D1:%.*]] = insertelement <4 x i1> [[D0]], i1 [[C1]], i32 1 256; CHECK-NEXT: [[D2:%.*]] = insertelement <4 x i1> [[D1]], i1 [[C2]], i32 2 257; CHECK-NEXT: [[D3:%.*]] = insertelement <4 x i1> [[D2]], i1 [[C3]], i32 3 258; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[D3]] to <4 x i32> 259; CHECK-NEXT: ret <4 x i32> [[R]] 260; 261 %a0 = extractelement <4 x float> %a, i32 0 262 %a1 = extractelement <4 x float> %a, i32 1 263 %a2 = extractelement <4 x float> %a, i32 2 264 %a3 = extractelement <4 x float> %a, i32 3 265 %p0 = getelementptr inbounds float, float* %b, i32 0 266 %p1 = getelementptr inbounds float, float* %b, i32 1 267 %p2 = getelementptr inbounds float, float* %b, i32 2 268 %p3 = getelementptr inbounds float, float* %b, i32 3 269 %b0 = load float, float* %p0, align 4 270 %b1 = load float, float* %p1, align 4 271 %b2 = load float, float* %p2, align 4 272 %b3 = load float, float* %p3, align 4 273 %c0 = fcmp ord float %a0, %b0 274 %c1 = fcmp uno float %b1, %a1 275 %c2 = fcmp uno float %b2, %a2 276 %c3 = fcmp ord float %a3, %b3 277 %d0 = insertelement <4 x i1> undef, i1 %c0, i32 0 278 %d1 = insertelement <4 x i1> %d0, i1 %c1, i32 1 279 %d2 = insertelement <4 x i1> %d1, i1 %c2, i32 2 280 %d3 = insertelement <4 x i1> %d2, i1 %c3, i32 3 281 %r = sext <4 x i1> %d3 to <4 x i32> 282 ret <4 x i32> %r 283} 284