1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=X64 4 5; Verify that the backend correctly folds a sign/zero extend of a vector where 6; elements are all constant values or UNDEFs. 7; The backend should be able to optimize all the test functions below into 8; simple loads from constant pool of the result. That is because the resulting 9; vector should be known at static time. 10 11define <4 x i16> @test_sext_4i8_4i16() { 12; X32-LABEL: test_sext_4i8_4i16: 13; X32: # %bb.0: 14; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u> 15; X32-NEXT: retl 16; 17; X64-LABEL: test_sext_4i8_4i16: 18; X64: # %bb.0: 19; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,65535,2,65533,u,u,u,u> 20; X64-NEXT: retq 21 %1 = insertelement <4 x i8> undef, i8 0, i32 0 22 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 23 %3 = insertelement <4 x i8> %2, i8 2, i32 2 24 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 25 %5 = sext <4 x i8> %4 to <4 x i16> 26 ret <4 x i16> %5 27} 28 29define <4 x i16> @test_sext_4i8_4i16_undef() { 30; X32-LABEL: test_sext_4i8_4i16_undef: 31; X32: # %bb.0: 32; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u> 33; X32-NEXT: retl 34; 35; X64-LABEL: test_sext_4i8_4i16_undef: 36; X64: # %bb.0: 37; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,u,u,u> 38; X64-NEXT: retq 39 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 40 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 41 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 42 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 43 %5 = sext <4 x i8> %4 to <4 x i16> 44 ret <4 x i16> %5 45} 46 47define <4 x i32> @test_sext_4i8_4i32() { 48; X32-LABEL: test_sext_4i8_4i32: 49; X32: # %bb.0: 50; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 51; X32-NEXT: retl 52; 53; X64-LABEL: test_sext_4i8_4i32: 54; X64: # %bb.0: 55; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,2,4294967293] 56; X64-NEXT: retq 57 %1 = insertelement <4 x i8> undef, i8 0, i32 0 58 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 59 %3 = insertelement <4 x i8> %2, i8 2, i32 2 60 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 61 %5 = sext <4 x i8> %4 to <4 x i32> 62 ret <4 x i32> %5 63} 64 65define <4 x i32> @test_sext_4i8_4i32_undef() { 66; X32-LABEL: test_sext_4i8_4i32_undef: 67; X32: # %bb.0: 68; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 69; X32-NEXT: retl 70; 71; X64-LABEL: test_sext_4i8_4i32_undef: 72; X64: # %bb.0: 73; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,4294967295,u,4294967293> 74; X64-NEXT: retq 75 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 76 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 77 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 78 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 79 %5 = sext <4 x i8> %4 to <4 x i32> 80 ret <4 x i32> %5 81} 82 83define <4 x i64> @test_sext_4i8_4i64() { 84; X32-LABEL: test_sext_4i8_4i64: 85; X32: # %bb.0: 86; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295] 87; X32-NEXT: retl 88; 89; X64-LABEL: test_sext_4i8_4i64: 90; X64: # %bb.0: 91; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,2,18446744073709551613] 92; X64-NEXT: retq 93 %1 = insertelement <4 x i8> undef, i8 0, i32 0 94 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 95 %3 = insertelement <4 x i8> %2, i8 2, i32 2 96 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 97 %5 = sext <4 x i8> %4 to <4 x i64> 98 ret <4 x i64> %5 99} 100 101define <4 x i64> @test_sext_4i8_4i64_undef() { 102; X32-LABEL: test_sext_4i8_4i64_undef: 103; X32: # %bb.0: 104; X32-NEXT: vmovaps {{.*#+}} ymm0 = <u,u,4294967295,4294967295,u,u,4294967293,4294967295> 105; X32-NEXT: retl 106; 107; X64-LABEL: test_sext_4i8_4i64_undef: 108; X64: # %bb.0: 109; X64-NEXT: vmovaps {{.*#+}} ymm0 = <u,18446744073709551615,u,18446744073709551613> 110; X64-NEXT: retq 111 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 112 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 113 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 114 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 115 %5 = sext <4 x i8> %4 to <4 x i64> 116 ret <4 x i64> %5 117} 118 119define <8 x i16> @test_sext_8i8_8i16() { 120; X32-LABEL: test_sext_8i8_8i16: 121; X32: # %bb.0: 122; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529] 123; X32-NEXT: retl 124; 125; X64-LABEL: test_sext_8i8_8i16: 126; X64: # %bb.0: 127; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,2,65533,4,65531,6,65529] 128; X64-NEXT: retq 129 %1 = insertelement <8 x i8> undef, i8 0, i32 0 130 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 131 %3 = insertelement <8 x i8> %2, i8 2, i32 2 132 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 133 %5 = insertelement <8 x i8> %4, i8 4, i32 4 134 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 135 %7 = insertelement <8 x i8> %6, i8 6, i32 6 136 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 137 %9 = sext <8 x i8> %8 to <8 x i16> 138 ret <8 x i16> %9 139} 140 141define <8 x i32> @test_sext_8i8_8i32() { 142; X32-LABEL: test_sext_8i8_8i32: 143; X32: # %bb.0: 144; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289] 145; X32-NEXT: retl 146; 147; X64-LABEL: test_sext_8i8_8i32: 148; X64: # %bb.0: 149; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,2,4294967293,4,4294967291,6,4294967289] 150; X64-NEXT: retq 151 %1 = insertelement <8 x i8> undef, i8 0, i32 0 152 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 153 %3 = insertelement <8 x i8> %2, i8 2, i32 2 154 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 155 %5 = insertelement <8 x i8> %4, i8 4, i32 4 156 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 157 %7 = insertelement <8 x i8> %6, i8 6, i32 6 158 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 159 %9 = sext <8 x i8> %8 to <8 x i32> 160 ret <8 x i32> %9 161} 162 163define <8 x i16> @test_sext_8i8_8i16_undef() { 164; X32-LABEL: test_sext_8i8_8i16_undef: 165; X32: # %bb.0: 166; X32-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,65531,u,65529> 167; X32-NEXT: retl 168; 169; X64-LABEL: test_sext_8i8_8i16_undef: 170; X64: # %bb.0: 171; X64-NEXT: vmovaps {{.*#+}} xmm0 = <u,65535,u,65533,u,65531,u,65529> 172; X64-NEXT: retq 173 %1 = insertelement <8 x i8> undef, i8 undef, i32 0 174 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 175 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 176 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 177 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 178 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 179 %7 = insertelement <8 x i8> %6, i8 undef, i32 6 180 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 181 %9 = sext <8 x i8> %8 to <8 x i16> 182 ret <8 x i16> %9 183} 184 185define <8 x i32> @test_sext_8i8_8i32_undef() { 186; X32-LABEL: test_sext_8i8_8i32_undef: 187; X32: # %bb.0: 188; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u> 189; X32-NEXT: retl 190; 191; X64-LABEL: test_sext_8i8_8i32_undef: 192; X64: # %bb.0: 193; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,u,4,u,6,u> 194; X64-NEXT: retq 195 %1 = insertelement <8 x i8> undef, i8 0, i32 0 196 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 197 %3 = insertelement <8 x i8> %2, i8 2, i32 2 198 %4 = insertelement <8 x i8> %3, i8 undef, i32 3 199 %5 = insertelement <8 x i8> %4, i8 4, i32 4 200 %6 = insertelement <8 x i8> %5, i8 undef, i32 5 201 %7 = insertelement <8 x i8> %6, i8 6, i32 6 202 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 203 %9 = sext <8 x i8> %8 to <8 x i32> 204 ret <8 x i32> %9 205} 206 207define <4 x i16> @test_zext_4i8_4i16() { 208; X32-LABEL: test_zext_4i8_4i16: 209; X32: # %bb.0: 210; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u> 211; X32-NEXT: retl 212; 213; X64-LABEL: test_zext_4i8_4i16: 214; X64: # %bb.0: 215; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,2,253,u,u,u,u> 216; X64-NEXT: retq 217 %1 = insertelement <4 x i8> undef, i8 0, i32 0 218 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 219 %3 = insertelement <4 x i8> %2, i8 2, i32 2 220 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 221 %5 = zext <4 x i8> %4 to <4 x i16> 222 ret <4 x i16> %5 223} 224 225define <4 x i32> @test_zext_4i8_4i32() { 226; X32-LABEL: test_zext_4i8_4i32: 227; X32: # %bb.0: 228; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 229; X32-NEXT: retl 230; 231; X64-LABEL: test_zext_4i8_4i32: 232; X64: # %bb.0: 233; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253] 234; X64-NEXT: retq 235 %1 = insertelement <4 x i8> undef, i8 0, i32 0 236 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 237 %3 = insertelement <4 x i8> %2, i8 2, i32 2 238 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 239 %5 = zext <4 x i8> %4 to <4 x i32> 240 ret <4 x i32> %5 241} 242 243define <4 x i64> @test_zext_4i8_4i64() { 244; X32-LABEL: test_zext_4i8_4i64: 245; X32: # %bb.0: 246; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0] 247; X32-NEXT: retl 248; 249; X64-LABEL: test_zext_4i8_4i64: 250; X64: # %bb.0: 251; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253] 252; X64-NEXT: retq 253 %1 = insertelement <4 x i8> undef, i8 0, i32 0 254 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 255 %3 = insertelement <4 x i8> %2, i8 2, i32 2 256 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 257 %5 = zext <4 x i8> %4 to <4 x i64> 258 ret <4 x i64> %5 259} 260 261define <4 x i16> @test_zext_4i8_4i16_undef() { 262; X32-LABEL: test_zext_4i8_4i16_undef: 263; X32: # %bb.0: 264; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u> 265; X32-NEXT: retl 266; 267; X64-LABEL: test_zext_4i8_4i16_undef: 268; X64: # %bb.0: 269; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,255,0,253,u,u,u,u> 270; X64-NEXT: retq 271 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 272 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 273 %3 = insertelement <4 x i8> %2, i8 undef, i32 2 274 %4 = insertelement <4 x i8> %3, i8 -3, i32 3 275 %5 = zext <4 x i8> %4 to <4 x i16> 276 ret <4 x i16> %5 277} 278 279define <4 x i32> @test_zext_4i8_4i32_undef() { 280; X32-LABEL: test_zext_4i8_4i32_undef: 281; X32: # %bb.0: 282; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] 283; X32-NEXT: retl 284; 285; X64-LABEL: test_zext_4i8_4i32_undef: 286; X64: # %bb.0: 287; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] 288; X64-NEXT: retq 289 %1 = insertelement <4 x i8> undef, i8 0, i32 0 290 %2 = insertelement <4 x i8> %1, i8 undef, i32 1 291 %3 = insertelement <4 x i8> %2, i8 2, i32 2 292 %4 = insertelement <4 x i8> %3, i8 undef, i32 3 293 %5 = zext <4 x i8> %4 to <4 x i32> 294 ret <4 x i32> %5 295} 296 297define <4 x i64> @test_zext_4i8_4i64_undef() { 298; X32-LABEL: test_zext_4i8_4i64_undef: 299; X32: # %bb.0: 300; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0] 301; X32-NEXT: retl 302; 303; X64-LABEL: test_zext_4i8_4i64_undef: 304; X64: # %bb.0: 305; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0] 306; X64-NEXT: retq 307 %1 = insertelement <4 x i8> undef, i8 undef, i32 0 308 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 309 %3 = insertelement <4 x i8> %2, i8 2, i32 2 310 %4 = insertelement <4 x i8> %3, i8 undef, i32 3 311 %5 = zext <4 x i8> %4 to <4 x i64> 312 ret <4 x i64> %5 313} 314 315define <8 x i16> @test_zext_8i8_8i16() { 316; X32-LABEL: test_zext_8i8_8i16: 317; X32: # %bb.0: 318; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249] 319; X32-NEXT: retl 320; 321; X64-LABEL: test_zext_8i8_8i16: 322; X64: # %bb.0: 323; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,2,253,4,251,6,249] 324; X64-NEXT: retq 325 %1 = insertelement <8 x i8> undef, i8 0, i32 0 326 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 327 %3 = insertelement <8 x i8> %2, i8 2, i32 2 328 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 329 %5 = insertelement <8 x i8> %4, i8 4, i32 4 330 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 331 %7 = insertelement <8 x i8> %6, i8 6, i32 6 332 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 333 %9 = zext <8 x i8> %8 to <8 x i16> 334 ret <8 x i16> %9 335} 336 337define <8 x i32> @test_zext_8i8_8i32() { 338; X32-LABEL: test_zext_8i8_8i32: 339; X32: # %bb.0: 340; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249] 341; X32-NEXT: retl 342; 343; X64-LABEL: test_zext_8i8_8i32: 344; X64: # %bb.0: 345; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,253,4,251,6,249] 346; X64-NEXT: retq 347 %1 = insertelement <8 x i8> undef, i8 0, i32 0 348 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 349 %3 = insertelement <8 x i8> %2, i8 2, i32 2 350 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 351 %5 = insertelement <8 x i8> %4, i8 4, i32 4 352 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 353 %7 = insertelement <8 x i8> %6, i8 6, i32 6 354 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 355 %9 = zext <8 x i8> %8 to <8 x i32> 356 ret <8 x i32> %9 357} 358 359define <8 x i16> @test_zext_8i8_8i16_undef() { 360; X32-LABEL: test_zext_8i8_8i16_undef: 361; X32: # %bb.0: 362; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] 363; X32-NEXT: retl 364; 365; X64-LABEL: test_zext_8i8_8i16_undef: 366; X64: # %bb.0: 367; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] 368; X64-NEXT: retq 369 %1 = insertelement <8 x i8> undef, i8 undef, i32 0 370 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 371 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 372 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 373 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 374 %6 = insertelement <8 x i8> %5, i8 -5, i32 5 375 %7 = insertelement <8 x i8> %6, i8 undef, i32 6 376 %8 = insertelement <8 x i8> %7, i8 -7, i32 7 377 %9 = zext <8 x i8> %8 to <8 x i16> 378 ret <8 x i16> %9 379} 380 381define <8 x i32> @test_zext_8i8_8i32_undef() { 382; X32-LABEL: test_zext_8i8_8i32_undef: 383; X32: # %bb.0: 384; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] 385; X32-NEXT: retl 386; 387; X64-LABEL: test_zext_8i8_8i32_undef: 388; X64: # %bb.0: 389; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] 390; X64-NEXT: retq 391 %1 = insertelement <8 x i8> undef, i8 0, i32 0 392 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 393 %3 = insertelement <8 x i8> %2, i8 2, i32 2 394 %4 = insertelement <8 x i8> %3, i8 -3, i32 3 395 %5 = insertelement <8 x i8> %4, i8 4, i32 4 396 %6 = insertelement <8 x i8> %5, i8 undef, i32 5 397 %7 = insertelement <8 x i8> %6, i8 6, i32 6 398 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 399 %9 = zext <8 x i8> %8 to <8 x i32> 400 ret <8 x i32> %9 401} 402