1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4 5define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { 6; CHECK-LABEL: @foo( 7; CHECK-NEXT: [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 8; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]] 9; CHECK-NEXT: ret i32 [[TMP1]] 10; 11 %e = icmp slt i32 %a, %b 12 %f = sext i1 %e to i32 13 %g = and i32 %c, %f 14 %h = xor i32 %f, -1 15 %i = and i32 %d, %h 16 %j = or i32 %g, %i 17 ret i32 %j 18} 19 20define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) { 21; CHECK-LABEL: @bar( 22; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 23; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 24; CHECK-NEXT: ret i32 [[TMP1]] 25; 26 %e = icmp slt i32 %a, %b 27 %f = sext i1 %e to i32 28 %g = and i32 %c, %f 29 %h = xor i32 %f, -1 30 %i = and i32 %d, %h 31 %j = or i32 %i, %g 32 ret i32 %j 33} 34 35define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) { 36; CHECK-LABEL: @goo( 37; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 38; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] 39; CHECK-NEXT: ret i32 [[TMP1]] 40; 41 %t0 = icmp slt i32 %a, %b 42 %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 43 %t1 = and i32 %iftmp.0.0, %c 44 %not = xor i32 %iftmp.0.0, -1 45 %t2 = and i32 %not, %d 46 %t3 = or i32 %t1, %t2 47 ret i32 %t3 48} 49 50define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) { 51; CHECK-LABEL: @poo( 52; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 53; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] 54; CHECK-NEXT: ret i32 [[T3]] 55; 56 %t0 = icmp slt i32 %a, %b 57 %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 58 %t1 = and i32 %iftmp.0.0, %c 59 %iftmp = select i1 %t0, i32 0, i32 -1 60 %t2 = and i32 %iftmp, %d 61 %t3 = or i32 %t1, %t2 62 ret i32 %t3 63} 64 65; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791 66; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 67 68define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) { 69; CHECK-LABEL: @fold_inverted_icmp_preds( 70; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 71; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 72; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] 73; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 0, i32 [[D:%.*]] 74; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 75; CHECK-NEXT: ret i32 [[OR]] 76; 77 %cmp1 = icmp slt i32 %a, %b 78 %sel1 = select i1 %cmp1, i32 %c, i32 0 79 %cmp2 = icmp sge i32 %a, %b 80 %sel2 = select i1 %cmp2, i32 %d, i32 0 81 %or = or i32 %sel1, %sel2 82 ret i32 %or 83} 84 85; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 86 87define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) { 88; CHECK-LABEL: @fold_inverted_icmp_preds_reverse( 89; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 90; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 [[C:%.*]] 91; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] 92; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 [[D:%.*]], i32 0 93; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 94; CHECK-NEXT: ret i32 [[OR]] 95; 96 %cmp1 = icmp slt i32 %a, %b 97 %sel1 = select i1 %cmp1, i32 0, i32 %c 98 %cmp2 = icmp sge i32 %a, %b 99 %sel2 = select i1 %cmp2, i32 0, i32 %d 100 %or = or i32 %sel1, %sel2 101 ret i32 %or 102} 103 104; TODO: Should fcmp have the same sort of predicate canonicalization as icmp? 105 106define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) { 107; CHECK-LABEL: @fold_inverted_fcmp_preds( 108; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[A:%.*]], [[B:%.*]] 109; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 110; CHECK-NEXT: [[CMP2:%.*]] = fcmp uge float [[A]], [[B]] 111; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[D:%.*]], i32 0 112; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 113; CHECK-NEXT: ret i32 [[OR]] 114; 115 %cmp1 = fcmp olt float %a, %b 116 %sel1 = select i1 %cmp1, i32 %c, i32 0 117 %cmp2 = fcmp uge float %a, %b 118 %sel2 = select i1 %cmp2, i32 %d, i32 0 119 %or = or i32 %sel1, %sel2 120 ret i32 %or 121} 122 123; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 124 125define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { 126; CHECK-LABEL: @fold_inverted_icmp_vector_preds( 127; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq <2 x i32> [[A:%.*]], [[B:%.*]] 128; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1_NOT]], <2 x i32> zeroinitializer, <2 x i32> [[C:%.*]] 129; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> [[A]], [[B]] 130; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[D:%.*]], <2 x i32> zeroinitializer 131; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]] 132; CHECK-NEXT: ret <2 x i32> [[OR]] 133; 134 %cmp1 = icmp ne <2 x i32> %a, %b 135 %sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> <i32 0, i32 0> 136 %cmp2 = icmp eq <2 x i32> %a, %b 137 %sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> <i32 0, i32 0> 138 %or = or <2 x i32> %sel1, %sel2 139 ret <2 x i32> %or 140} 141 142define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) { 143; CHECK-LABEL: @par( 144; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 145; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] 146; CHECK-NEXT: ret i32 [[TMP1]] 147; 148 %t0 = icmp slt i32 %a, %b 149 %iftmp.1.0 = select i1 %t0, i32 -1, i32 0 150 %t1 = and i32 %iftmp.1.0, %c 151 %not = xor i32 %iftmp.1.0, -1 152 %t2 = and i32 %not, %d 153 %t3 = or i32 %t1, %t2 154 ret i32 %t3 155} 156 157; In the following tests (8 commutation variants), verify that a bitcast doesn't get 158; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly 159; other vector code because of canonicalization to i64 elements for vectors. 160 161; The fptosi instructions are included to avoid commutation canonicalization based on 162; operator weight. Using another cast operator ensures that both operands of all logic 163; ops are equally weighted, and this ensures that we're testing all commutation 164; possibilities. 165 166define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 167; CHECK-LABEL: @bitcast_select_swap0( 168; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 169; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 170; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 171; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 172; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 173; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 174; CHECK-NEXT: ret <2 x i64> [[TMP4]] 175; 176 %sia = fptosi <2 x double> %a to <2 x i64> 177 %sib = fptosi <2 x double> %b to <2 x i64> 178 %sext = sext <4 x i1> %cmp to <4 x i32> 179 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 180 %and1 = and <2 x i64> %bc1, %sia 181 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 182 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 183 %and2 = and <2 x i64> %bc2, %sib 184 %or = or <2 x i64> %and1, %and2 185 ret <2 x i64> %or 186} 187 188define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 189; CHECK-LABEL: @bitcast_select_swap1( 190; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 191; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 192; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 193; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 194; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 195; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 196; CHECK-NEXT: ret <2 x i64> [[TMP4]] 197; 198 %sia = fptosi <2 x double> %a to <2 x i64> 199 %sib = fptosi <2 x double> %b to <2 x i64> 200 %sext = sext <4 x i1> %cmp to <4 x i32> 201 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 202 %and1 = and <2 x i64> %bc1, %sia 203 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 204 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 205 %and2 = and <2 x i64> %bc2, %sib 206 %or = or <2 x i64> %and2, %and1 207 ret <2 x i64> %or 208} 209 210define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 211; CHECK-LABEL: @bitcast_select_swap2( 212; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 213; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 214; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 215; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 216; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 217; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 218; CHECK-NEXT: ret <2 x i64> [[TMP4]] 219; 220 %sia = fptosi <2 x double> %a to <2 x i64> 221 %sib = fptosi <2 x double> %b to <2 x i64> 222 %sext = sext <4 x i1> %cmp to <4 x i32> 223 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 224 %and1 = and <2 x i64> %bc1, %sia 225 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 226 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 227 %and2 = and <2 x i64> %sib, %bc2 228 %or = or <2 x i64> %and1, %and2 229 ret <2 x i64> %or 230} 231 232define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 233; CHECK-LABEL: @bitcast_select_swap3( 234; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 235; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 236; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 237; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 238; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 239; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 240; CHECK-NEXT: ret <2 x i64> [[TMP4]] 241; 242 %sia = fptosi <2 x double> %a to <2 x i64> 243 %sib = fptosi <2 x double> %b to <2 x i64> 244 %sext = sext <4 x i1> %cmp to <4 x i32> 245 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 246 %and1 = and <2 x i64> %bc1, %sia 247 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 248 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 249 %and2 = and <2 x i64> %sib, %bc2 250 %or = or <2 x i64> %and2, %and1 251 ret <2 x i64> %or 252} 253 254define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 255; CHECK-LABEL: @bitcast_select_swap4( 256; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 257; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 258; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 259; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 260; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 261; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 262; CHECK-NEXT: ret <2 x i64> [[TMP4]] 263; 264 %sia = fptosi <2 x double> %a to <2 x i64> 265 %sib = fptosi <2 x double> %b to <2 x i64> 266 %sext = sext <4 x i1> %cmp to <4 x i32> 267 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 268 %and1 = and <2 x i64> %sia, %bc1 269 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 270 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 271 %and2 = and <2 x i64> %bc2, %sib 272 %or = or <2 x i64> %and1, %and2 273 ret <2 x i64> %or 274} 275 276define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 277; CHECK-LABEL: @bitcast_select_swap5( 278; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 279; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 280; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 281; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 282; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 283; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 284; CHECK-NEXT: ret <2 x i64> [[TMP4]] 285; 286 %sia = fptosi <2 x double> %a to <2 x i64> 287 %sib = fptosi <2 x double> %b to <2 x i64> 288 %sext = sext <4 x i1> %cmp to <4 x i32> 289 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 290 %and1 = and <2 x i64> %sia, %bc1 291 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 292 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 293 %and2 = and <2 x i64> %bc2, %sib 294 %or = or <2 x i64> %and2, %and1 295 ret <2 x i64> %or 296} 297 298define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 299; CHECK-LABEL: @bitcast_select_swap6( 300; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 301; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 302; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 303; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 304; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 305; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 306; CHECK-NEXT: ret <2 x i64> [[TMP4]] 307; 308 %sia = fptosi <2 x double> %a to <2 x i64> 309 %sib = fptosi <2 x double> %b to <2 x i64> 310 %sext = sext <4 x i1> %cmp to <4 x i32> 311 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 312 %and1 = and <2 x i64> %sia, %bc1 313 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 314 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 315 %and2 = and <2 x i64> %sib, %bc2 316 %or = or <2 x i64> %and1, %and2 317 ret <2 x i64> %or 318} 319 320define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 321; CHECK-LABEL: @bitcast_select_swap7( 322; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 323; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 324; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 325; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 326; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 327; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 328; CHECK-NEXT: ret <2 x i64> [[TMP4]] 329; 330 %sia = fptosi <2 x double> %a to <2 x i64> 331 %sib = fptosi <2 x double> %b to <2 x i64> 332 %sext = sext <4 x i1> %cmp to <4 x i32> 333 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 334 %and1 = and <2 x i64> %sia, %bc1 335 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 336 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 337 %and2 = and <2 x i64> %sib, %bc2 338 %or = or <2 x i64> %and2, %and1 339 ret <2 x i64> %or 340} 341 342define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) { 343; CHECK-LABEL: @bitcast_select_multi_uses( 344; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP:%.*]] to <4 x i32> 345; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> 346; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], [[A:%.*]] 347; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> 348; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1> 349; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], [[B:%.*]] 350; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] 351; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] 352; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]] 353; CHECK-NEXT: ret <2 x i64> [[SUB]] 354; 355 %sext = sext <4 x i1> %cmp to <4 x i32> 356 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 357 %and1 = and <2 x i64> %a, %bc1 358 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 359 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 360 %and2 = and <2 x i64> %b, %bc2 361 %or = or <2 x i64> %and2, %and1 362 %add = add <2 x i64> %and2, %bc2 363 %sub = sub <2 x i64> %or, %add 364 ret <2 x i64> %sub 365} 366 367define i1 @bools(i1 %a, i1 %b, i1 %c) { 368; CHECK-LABEL: @bools( 369; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] 370; CHECK-NEXT: ret i1 [[TMP1]] 371; 372 %not = xor i1 %c, -1 373 %and1 = and i1 %not, %a 374 %and2 = and i1 %c, %b 375 %or = or i1 %and1, %and2 376 ret i1 %or 377} 378 379; Form a select if we know we can get replace 2 simple logic ops. 380 381define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { 382; CHECK-LABEL: @bools_multi_uses1( 383; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true 384; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], [[A:%.*]] 385; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] 386; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]] 387; CHECK-NEXT: ret i1 [[XOR]] 388; 389 %not = xor i1 %c, -1 390 %and1 = and i1 %not, %a 391 %and2 = and i1 %c, %b 392 %or = or i1 %and1, %and2 393 %xor = xor i1 %or, %and1 394 ret i1 %xor 395} 396 397; Don't replace a cheap logic op with a potentially expensive select 398; unless we can also eliminate one of the other original ops. 399 400define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) { 401; CHECK-LABEL: @bools_multi_uses2( 402; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] 403; CHECK-NEXT: ret i1 [[TMP1]] 404; 405 %not = xor i1 %c, -1 406 %and1 = and i1 %not, %a 407 %and2 = and i1 %c, %b 408 %or = or i1 %and1, %and2 409 %add = add i1 %and1, %and2 410 %and3 = and i1 %or, %add 411 ret i1 %and3 412} 413 414define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { 415; CHECK-LABEL: @vec_of_bools( 416; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]] 417; CHECK-NEXT: ret <4 x i1> [[TMP1]] 418; 419 %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true> 420 %and1 = and <4 x i1> %not, %a 421 %and2 = and <4 x i1> %b, %c 422 %or = or <4 x i1> %and2, %and1 423 ret <4 x i1> %or 424} 425 426define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) { 427; CHECK-LABEL: @vec_of_casted_bools( 428; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> 429; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> 430; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP2]], <4 x i1> [[TMP1]] 431; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 432; CHECK-NEXT: ret i4 [[TMP4]] 433; 434 %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true> 435 %bc1 = bitcast <4 x i1> %not to i4 436 %bc2 = bitcast <4 x i1> %c to i4 437 %and1 = and i4 %a, %bc1 438 %and2 = and i4 %bc2, %b 439 %or = or i4 %and1, %and2 440 ret i4 %or 441} 442 443; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle. 444 445define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { 446; CHECK-LABEL: @vec_sel_consts( 447; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 448; CHECK-NEXT: ret <4 x i32> [[TMP1]] 449; 450 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1> 451 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0> 452 %or = or <4 x i32> %and1, %and2 453 ret <4 x i32> %or 454} 455 456define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { 457; CHECK-LABEL: @vec_sel_consts_weird( 458; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> <i32 0, i32 4, i32 2> 459; CHECK-NEXT: ret <3 x i129> [[TMP1]] 460; 461 %and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1> 462 %and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0> 463 %or = or <3 x i129> %and2, %and1 464 ret <3 x i129> %or 465} 466 467; The mask elements must be inverted for this to be a select. 468 469define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) { 470; CHECK-LABEL: @vec_not_sel_consts( 471; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 0, i32 0, i32 0> 472; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 -1> 473; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] 474; CHECK-NEXT: ret <4 x i32> [[OR]] 475; 476 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 477 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1> 478 %or = or <4 x i32> %and1, %and2 479 ret <4 x i32> %or 480} 481 482define <4 x i32> @vec_not_sel_consts_undef_elts(<4 x i32> %a, <4 x i32> %b) { 483; CHECK-LABEL: @vec_not_sel_consts_undef_elts( 484; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 undef, i32 0, i32 0> 485; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 undef> 486; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] 487; CHECK-NEXT: ret <4 x i32> [[OR]] 488; 489 %and1 = and <4 x i32> %a, <i32 -1, i32 undef, i32 0, i32 0> 490 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 undef> 491 %or = or <4 x i32> %and1, %and2 492 ret <4 x i32> %or 493} 494 495; The inverted constants may be operands of xor instructions. 496 497define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { 498; CHECK-LABEL: @vec_sel_xor( 499; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 false, i1 true, i1 true, i1 true> 500; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] 501; CHECK-NEXT: ret <4 x i32> [[TMP2]] 502; 503 %mask = sext <4 x i1> %c to <4 x i32> 504 %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0> 505 %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1> 506 %and1 = and <4 x i32> %not_mask_flip1, %a 507 %and2 = and <4 x i32> %mask_flip1, %b 508 %or = or <4 x i32> %and1, %and2 509 ret <4 x i32> %or 510} 511 512; Allow the transform even if the mask values have multiple uses because 513; there's still a net reduction of instructions from removing the and/and/or. 514 515define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { 516; CHECK-LABEL: @vec_sel_xor_multi_use( 517; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 true, i1 false, i1 false, i1 false> 518; CHECK-NEXT: [[MASK_FLIP1:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 519; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[C]], <i1 false, i1 true, i1 true, i1 true> 520; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] 521; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP3]], [[MASK_FLIP1]] 522; CHECK-NEXT: ret <4 x i32> [[ADD]] 523; 524 %mask = sext <4 x i1> %c to <4 x i32> 525 %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0> 526 %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1> 527 %and1 = and <4 x i32> %not_mask_flip1, %a 528 %and2 = and <4 x i32> %mask_flip1, %b 529 %or = or <4 x i32> %and1, %and2 530 %add = add <4 x i32> %or, %mask_flip1 531 ret <4 x i32> %add 532} 533 534; The 'ashr' guarantees that we have a bitmask, so this is select with truncated condition. 535 536define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { 537; CHECK-LABEL: @allSignBits( 538; CHECK-NEXT: [[DOTNOT:%.*]] = icmp slt i32 [[COND:%.*]], 0 539; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]] 540; CHECK-NEXT: ret i32 [[TMP1]] 541; 542 %bitmask = ashr i32 %cond, 31 543 %not_bitmask = xor i32 %bitmask, -1 544 %a1 = and i32 %tval, %bitmask 545 %a2 = and i32 %not_bitmask, %fval 546 %sel = or i32 %a1, %a2 547 ret i32 %sel 548} 549 550define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { 551; CHECK-LABEL: @allSignBits_vec( 552; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> 553; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]] 554; CHECK-NEXT: ret <4 x i8> [[TMP1]] 555; 556 %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7> 557 %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1> 558 %a1 = and <4 x i8> %tval, %bitmask 559 %a2 = and <4 x i8> %fval, %not_bitmask 560 %sel = or <4 x i8> %a2, %a1 561 ret <4 x i8> %sel 562} 563 564; Negative test - make sure that bitcasts from FP do not cause a crash. 565 566define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 567; CHECK-LABEL: @fp_bitcast( 568; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 569; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 570; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> 571; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[SIA]], [[BC1]] 572; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x double> [[B]] to <2 x i64> 573; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[SIB]], [[BC2]] 574; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] 575; CHECK-NEXT: ret <2 x i64> [[OR]] 576; 577 %sia = fptosi <2 x double> %a to <2 x i64> 578 %sib = fptosi <2 x double> %b to <2 x i64> 579 %bc1 = bitcast <2 x double> %a to <2 x i64> 580 %and1 = and <2 x i64> %sia, %bc1 581 %bc2 = bitcast <2 x double> %b to <2 x i64> 582 %and2 = and <2 x i64> %sib, %bc2 583 %or = or <2 x i64> %and2, %and1 584 ret <2 x i64> %or 585} 586 587define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 588; CHECK-LABEL: @computesignbits_through_shuffles( 589; CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]] 590; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 591; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 592; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 593; CHECK-NEXT: [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]] 594; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 595; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 596; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]] 597; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[SHUF_OR2]] to <4 x i1> 598; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]] 599; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[DOTV]] to <4 x i32> 600; CHECK-NEXT: ret <4 x i32> [[TMP2]] 601; 602 %cmp = fcmp ole <4 x float> %x, %y 603 %sext = sext <4 x i1> %cmp to <4 x i32> 604 %s1 = shufflevector <4 x i32> %sext, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 605 %s2 = shufflevector <4 x i32> %sext, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 606 %shuf_or1 = or <4 x i32> %s1, %s2 607 %s3 = shufflevector <4 x i32> %shuf_or1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 608 %s4 = shufflevector <4 x i32> %shuf_or1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 609 %shuf_or2 = or <4 x i32> %s3, %s4 610 %not_or2 = xor <4 x i32> %shuf_or2, <i32 -1, i32 -1, i32 -1, i32 -1> 611 %xbc = bitcast <4 x float> %x to <4 x i32> 612 %zbc = bitcast <4 x float> %z to <4 x i32> 613 %and1 = and <4 x i32> %not_or2, %xbc 614 %and2 = and <4 x i32> %shuf_or2, %zbc 615 %sel = or <4 x i32> %and1, %and2 616 ret <4 x i32> %sel 617} 618 619define <4 x i32> @computesignbits_through_two_input_shuffle(<4 x i32> %x, <4 x i32> %y, <4 x i1> %cond1, <4 x i1> %cond2) { 620; CHECK-LABEL: @computesignbits_through_two_input_shuffle( 621; CHECK-NEXT: [[SEXT1:%.*]] = sext <4 x i1> [[COND1:%.*]] to <4 x i32> 622; CHECK-NEXT: [[SEXT2:%.*]] = sext <4 x i1> [[COND2:%.*]] to <4 x i32> 623; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i32> [[SEXT1]], <4 x i32> [[SEXT2]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 624; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[COND]] to <4 x i1> 625; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]] 626; CHECK-NEXT: ret <4 x i32> [[TMP2]] 627; 628 %sext1 = sext <4 x i1> %cond1 to <4 x i32> 629 %sext2 = sext <4 x i1> %cond2 to <4 x i32> 630 %cond = shufflevector <4 x i32> %sext1, <4 x i32> %sext2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 631 %notcond = xor <4 x i32> %cond, <i32 -1, i32 -1, i32 -1, i32 -1> 632 %and1 = and <4 x i32> %notcond, %x 633 %and2 = and <4 x i32> %cond, %y 634 %sel = or <4 x i32> %and1, %and2 635 ret <4 x i32> %sel 636} 637 638