1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4; Can't get smaller than this. 5 6define <2 x i1> @trunc(<2 x i64> %a) { 7; CHECK-LABEL: @trunc( 8; CHECK-NEXT: [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1> 9; CHECK-NEXT: ret <2 x i1> [[T]] 10; 11 %t = trunc <2 x i64> %a to <2 x i1> 12 ret <2 x i1> %t 13} 14 15; This is trunc. 16 17define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) { 18; CHECK-LABEL: @and_cmp_is_trunc( 19; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1> 20; CHECK-NEXT: ret <2 x i1> [[R]] 21; 22 %t = and <2 x i64> %a, <i64 1, i64 1> 23 %r = icmp ne <2 x i64> %t, zeroinitializer 24 ret <2 x i1> %r 25} 26 27; This is trunc. 28 29define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) { 30; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt( 31; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1> 32; CHECK-NEXT: ret <2 x i1> [[R]] 33; 34 %t = and <2 x i64> %a, <i64 undef, i64 1> 35 %r = icmp ne <2 x i64> %t, zeroinitializer 36 ret <2 x i1> %r 37} 38 39; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete. 40 41define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) { 42; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts( 43; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1> 44; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0> 45; CHECK-NEXT: ret <2 x i1> [[R]] 46; 47 %t = and <2 x i64> %a, <i64 undef, i64 1> 48 %r = icmp ne <2 x i64> %t, <i64 undef, i64 0> 49 ret <2 x i1> %r 50} 51 52; The ashr turns into an lshr. 53define <2 x i64> @test2(<2 x i64> %a) { 54; CHECK-LABEL: @test2( 55; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1> 56; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767> 57; CHECK-NEXT: ret <2 x i64> [[TMP1]] 58; 59 %b = and <2 x i64> %a, <i64 65535, i64 65535> 60 %t = ashr <2 x i64> %b, <i64 1, i64 1> 61 ret <2 x i64> %t 62} 63 64define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) { 65; CHECK-LABEL: @test3( 66; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <4 x float> [[A:%.*]], [[B:%.*]] 67; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 68; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64> 69; CHECK-NEXT: ret <2 x i64> [[CONV]] 70; 71 %cmp = fcmp ord <4 x float> %a, zeroinitializer 72 %sext = sext <4 x i1> %cmp to <4 x i32> 73 %cmp4 = fcmp ord <4 x float> %b, zeroinitializer 74 %sext5 = sext <4 x i1> %cmp4 to <4 x i32> 75 %and = and <4 x i32> %sext, %sext5 76 %conv = bitcast <4 x i32> %and to <2 x i64> 77 ret <2 x i64> %conv 78} 79 80define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) { 81; CHECK-LABEL: @test4( 82; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x float> [[A:%.*]], [[B:%.*]] 83; CHECK-NEXT: [[OR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 84; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[OR]] to <2 x i64> 85; CHECK-NEXT: ret <2 x i64> [[CONV]] 86; 87 %cmp = fcmp uno <4 x float> %a, zeroinitializer 88 %sext = sext <4 x i1> %cmp to <4 x i32> 89 %cmp4 = fcmp uno <4 x float> %b, zeroinitializer 90 %sext5 = sext <4 x i1> %cmp4 to <4 x i32> 91 %or = or <4 x i32> %sext, %sext5 92 %conv = bitcast <4 x i32> %or to <2 x i64> 93 ret <2 x i64> %conv 94} 95 96; rdar://7434900 97define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) { 98; CHECK-LABEL: @test5( 99; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer 100; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer 101; CHECK-NEXT: [[AND1:%.*]] = and <4 x i1> [[CMP]], [[CMP4]] 102; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32> 103; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64> 104; CHECK-NEXT: ret <2 x i64> [[CONV]] 105; 106 %cmp = fcmp ult <4 x float> %a, zeroinitializer 107 %sext = sext <4 x i1> %cmp to <4 x i32> 108 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer 109 %sext5 = sext <4 x i1> %cmp4 to <4 x i32> 110 %and = and <4 x i32> %sext, %sext5 111 %conv = bitcast <4 x i32> %and to <2 x i64> 112 ret <2 x i64> %conv 113} 114 115define <2 x i64> @test6(<4 x float> %a, <4 x float> %b) { 116; CHECK-LABEL: @test6( 117; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer 118; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer 119; CHECK-NEXT: [[AND1:%.*]] = or <4 x i1> [[CMP]], [[CMP4]] 120; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32> 121; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64> 122; CHECK-NEXT: ret <2 x i64> [[CONV]] 123; 124 %cmp = fcmp ult <4 x float> %a, zeroinitializer 125 %sext = sext <4 x i1> %cmp to <4 x i32> 126 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer 127 %sext5 = sext <4 x i1> %cmp4 to <4 x i32> 128 %and = or <4 x i32> %sext, %sext5 129 %conv = bitcast <4 x i32> %and to <2 x i64> 130 ret <2 x i64> %conv 131} 132 133define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) { 134; CHECK-LABEL: @test7( 135; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer 136; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer 137; CHECK-NEXT: [[AND1:%.*]] = xor <4 x i1> [[CMP]], [[CMP4]] 138; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32> 139; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64> 140; CHECK-NEXT: ret <2 x i64> [[CONV]] 141; 142 %cmp = fcmp ult <4 x float> %a, zeroinitializer 143 %sext = sext <4 x i1> %cmp to <4 x i32> 144 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer 145 %sext5 = sext <4 x i1> %cmp4 to <4 x i32> 146 %and = xor <4 x i32> %sext, %sext5 147 %conv = bitcast <4 x i32> %and to <2 x i64> 148 ret <2 x i64> %conv 149} 150 151define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) { 152; CHECK-LABEL: @convert( 153; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32> 154; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1> 155; CHECK-NEXT: store <2 x i32> [[ADD]], <2 x i32>* [[DST_ADDR:%.*]], align 8 156; CHECK-NEXT: ret void 157; 158 %val = trunc <2 x i64> %src to <2 x i32> 159 %add = add <2 x i32> %val, <i32 1, i32 1> 160 store <2 x i32> %add, <2 x i32>* %dst.addr 161 ret void 162} 163 164define <2 x i65> @foo(<2 x i64> %t) { 165; CHECK-LABEL: @foo( 166; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295> 167; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65> 168; CHECK-NEXT: ret <2 x i65> [[B]] 169; 170 %a = trunc <2 x i64> %t to <2 x i32> 171 %b = zext <2 x i32> %a to <2 x i65> 172 ret <2 x i65> %b 173} 174 175define <2 x i64> @bar(<2 x i65> %t) { 176; CHECK-LABEL: @bar( 177; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> 178; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295> 179; CHECK-NEXT: ret <2 x i64> [[B]] 180; 181 %a = trunc <2 x i65> %t to <2 x i32> 182 %b = zext <2 x i32> %a to <2 x i64> 183 ret <2 x i64> %b 184} 185 186define <2 x i64> @bars(<2 x i65> %t) { 187; CHECK-LABEL: @bars( 188; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32> 189; CHECK-NEXT: [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64> 190; CHECK-NEXT: ret <2 x i64> [[B]] 191; 192 %a = trunc <2 x i65> %t to <2 x i32> 193 %b = sext <2 x i32> %a to <2 x i64> 194 ret <2 x i64> %b 195} 196 197define <2 x i64> @quxs(<2 x i64> %t) { 198; CHECK-LABEL: @quxs( 199; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32> 200; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32> 201; CHECK-NEXT: ret <2 x i64> [[B]] 202; 203 %a = trunc <2 x i64> %t to <2 x i32> 204 %b = sext <2 x i32> %a to <2 x i64> 205 ret <2 x i64> %b 206} 207 208define <2 x i64> @quxt(<2 x i64> %t) { 209; CHECK-LABEL: @quxt( 210; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32> 211; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32> 212; CHECK-NEXT: ret <2 x i64> [[B]] 213; 214 %a = shl <2 x i64> %t, <i64 32, i64 32> 215 %b = ashr <2 x i64> %a, <i64 32, i64 32> 216 ret <2 x i64> %b 217} 218 219define <2 x double> @fa(<2 x double> %t) { 220; CHECK-LABEL: @fa( 221; CHECK-NEXT: [[A:%.*]] = fptrunc <2 x double> [[T:%.*]] to <2 x float> 222; CHECK-NEXT: [[B:%.*]] = fpext <2 x float> [[A]] to <2 x double> 223; CHECK-NEXT: ret <2 x double> [[B]] 224; 225 %a = fptrunc <2 x double> %t to <2 x float> 226 %b = fpext <2 x float> %a to <2 x double> 227 ret <2 x double> %b 228} 229 230define <2 x double> @fb(<2 x double> %t) { 231; CHECK-LABEL: @fb( 232; CHECK-NEXT: [[A:%.*]] = fptoui <2 x double> [[T:%.*]] to <2 x i64> 233; CHECK-NEXT: [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double> 234; CHECK-NEXT: ret <2 x double> [[B]] 235; 236 %a = fptoui <2 x double> %t to <2 x i64> 237 %b = uitofp <2 x i64> %a to <2 x double> 238 ret <2 x double> %b 239} 240 241define <2 x double> @fc(<2 x double> %t) { 242; CHECK-LABEL: @fc( 243; CHECK-NEXT: [[A:%.*]] = fptosi <2 x double> [[T:%.*]] to <2 x i64> 244; CHECK-NEXT: [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double> 245; CHECK-NEXT: ret <2 x double> [[B]] 246; 247 %a = fptosi <2 x double> %t to <2 x i64> 248 %b = sitofp <2 x i64> %a to <2 x double> 249 ret <2 x double> %b 250} 251 252; PR9228 253define <4 x float> @f(i32 %a) { 254; CHECK-LABEL: @f( 255; CHECK-NEXT: ret <4 x float> undef 256; 257 %dim = insertelement <4 x i32> undef, i32 %a, i32 0 258 %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1 259 %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2 260 %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3 261 262 %offset_ptr = getelementptr <4 x float>, <4 x float>* null, i32 1 263 %offset_int = ptrtoint <4 x float>* %offset_ptr to i64 264 %sizeof32 = trunc i64 %offset_int to i32 265 266 %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0 267 %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1 268 %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2 269 %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3 270 271 %delta_scale = mul <4 x i32> %dim32, %smearinsert36 272 %offset_delta = add <4 x i32> zeroinitializer, %delta_scale 273 274 %offset_varying_delta = add <4 x i32> %offset_delta, undef 275 276 ret <4 x float> undef 277} 278 279define <8 x i32> @pr24458(<8 x float> %n) { 280; CHECK-LABEL: @pr24458( 281; CHECK-NEXT: ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 282; 283 %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer 284 %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer 285 %notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32> 286 %equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32> 287 %wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i 288 ret <8 x i32> %wrong 289} 290 291; Hoist a trunc to a scalar if we're inserting into an undef vector. 292; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index 293 294define <3 x i16> @trunc_inselt_undef(i32 %x) { 295; CHECK-LABEL: @trunc_inselt_undef( 296; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16 297; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1 298; CHECK-NEXT: ret <3 x i16> [[TRUNC]] 299; 300 %vec = insertelement <3 x i32> undef, i32 %x, i32 1 301 %trunc = trunc <3 x i32> %vec to <3 x i16> 302 ret <3 x i16> %trunc 303} 304 305; Hoist a trunc to a scalar if we're inserting into an undef vector. 306; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index 307 308define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) { 309; CHECK-LABEL: @fptrunc_inselt_undef( 310; CHECK-NEXT: [[TMP1:%.*]] = fptrunc double [[X:%.*]] to float 311; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 [[INDEX:%.*]] 312; CHECK-NEXT: ret <2 x float> [[TRUNC]] 313; 314 %vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index 315 %trunc = fptrunc <2 x double> %vec to <2 x float> 316 ret <2 x float> %trunc 317} 318 319; TODO: Strengthen the backend, so we can have this canonicalization. 320; Insert a scalar int into a constant vector and truncate: 321; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index 322 323define <3 x i16> @trunc_inselt1(i32 %x) { 324; CHECK-LABEL: @trunc_inselt1( 325; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 undef, i32 65536>, i32 [[X:%.*]], i32 1 326; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16> 327; CHECK-NEXT: ret <3 x i16> [[TRUNC]] 328; 329 %vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1 330 %trunc = trunc <3 x i32> %vec to <3 x i16> 331 ret <3 x i16> %trunc 332} 333 334; TODO: Strengthen the backend, so we can have this canonicalization. 335; Insert a scalar FP into a constant vector and FP truncate: 336; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index 337 338define <2 x float> @fptrunc_inselt1(double %x, i32 %index) { 339; CHECK-LABEL: @fptrunc_inselt1( 340; CHECK-NEXT: [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double [[X:%.*]], i32 [[INDEX:%.*]] 341; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float> 342; CHECK-NEXT: ret <2 x float> [[TRUNC]] 343; 344 %vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index 345 %trunc = fptrunc <2 x double> %vec to <2 x float> 346 ret <2 x float> %trunc 347} 348 349; TODO: Strengthen the backend, so we can have this canonicalization. 350; Insert a scalar int constant into a vector and truncate: 351; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index 352 353define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) { 354; CHECK-LABEL: @trunc_inselt2( 355; CHECK-NEXT: [[VEC:%.*]] = insertelement <8 x i32> [[X:%.*]], i32 1048576, i32 [[INDEX:%.*]] 356; CHECK-NEXT: [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16> 357; CHECK-NEXT: ret <8 x i16> [[TRUNC]] 358; 359 %vec = insertelement <8 x i32> %x, i32 1048576, i32 %index 360 %trunc = trunc <8 x i32> %vec to <8 x i16> 361 ret <8 x i16> %trunc 362} 363 364; TODO: Strengthen the backend, so we can have this canonicalization. 365; Insert a scalar FP constant into a vector and FP truncate: 366; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index 367 368define <3 x float> @fptrunc_inselt2(<3 x double> %x) { 369; CHECK-LABEL: @fptrunc_inselt2( 370; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x double> [[X:%.*]], double 4.000000e+00, i32 2 371; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float> 372; CHECK-NEXT: ret <3 x float> [[TRUNC]] 373; 374 %vec = insertelement <3 x double> %x, double 4.0, i32 2 375 %trunc = fptrunc <3 x double> %vec to <3 x float> 376 ret <3 x float> %trunc 377} 378 379; Converting to a wide type might reduce instruction count, 380; but we can not do that unless the backend can recover from 381; the creation of a potentially illegal op (like a 64-bit vmul). 382; PR40032 - https://bugs.llvm.org/show_bug.cgi?id=40032 383 384define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { 385; CHECK-LABEL: @sext_less_casting_with_wideop( 386; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> 387; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> 388; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] 389; CHECK-NEXT: [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64> 390; CHECK-NEXT: ret <2 x i64> [[R]] 391; 392 %xnarrow = trunc <2 x i64> %x to <2 x i32> 393 %ynarrow = trunc <2 x i64> %y to <2 x i32> 394 %mul = mul <2 x i32> %xnarrow, %ynarrow 395 %r = sext <2 x i32> %mul to <2 x i64> 396 ret <2 x i64> %r 397} 398 399define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { 400; CHECK-LABEL: @zext_less_casting_with_wideop( 401; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> 402; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> 403; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] 404; CHECK-NEXT: [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64> 405; CHECK-NEXT: ret <2 x i64> [[R]] 406; 407 %xnarrow = trunc <2 x i64> %x to <2 x i32> 408 %ynarrow = trunc <2 x i64> %y to <2 x i32> 409 %mul = mul <2 x i32> %xnarrow, %ynarrow 410 %r = zext <2 x i32> %mul to <2 x i64> 411 ret <2 x i64> %r 412} 413 414