1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx-builtins.c 6 7define <4 x double> @test_mm256_add_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 8; X32-LABEL: test_mm256_add_pd: 9; X32: # BB#0: 10; X32-NEXT: vaddpd %ymm1, %ymm0, %ymm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm256_add_pd: 14; X64: # BB#0: 15; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 16; X64-NEXT: retq 17 %res = fadd <4 x double> %a0, %a1 18 ret <4 x double> %res 19} 20 21define <8 x float> @test_mm256_add_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 22; X32-LABEL: test_mm256_add_ps: 23; X32: # BB#0: 24; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0 25; X32-NEXT: retl 26; 27; X64-LABEL: test_mm256_add_ps: 28; X64: # BB#0: 29; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 30; X64-NEXT: retq 31 %res = fadd <8 x float> %a0, %a1 32 ret <8 x float> %res 33} 34 35define <4 x double> @test_mm256_addsub_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 36; X32-LABEL: test_mm256_addsub_pd: 37; X32: # BB#0: 38; X32-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 39; X32-NEXT: retl 40; 41; X64-LABEL: test_mm256_addsub_pd: 42; X64: # BB#0: 43; X64-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 44; X64-NEXT: retq 45 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 46 ret <4 x double> %res 47} 48declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 49 50define <8 x float> @test_mm256_addsub_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 51; X32-LABEL: test_mm256_addsub_ps: 52; X32: # BB#0: 53; X32-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 54; X32-NEXT: retl 55; 56; X64-LABEL: test_mm256_addsub_ps: 57; X64: # BB#0: 58; X64-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 59; X64-NEXT: retq 60 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 61 ret <8 x float> %res 62} 63declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 64 65define <4 x double> @test_mm256_and_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 66; X32-LABEL: test_mm256_and_pd: 67; X32: # BB#0: 68; X32-NEXT: vandps %ymm1, %ymm0, %ymm0 69; X32-NEXT: retl 70; 71; X64-LABEL: test_mm256_and_pd: 72; X64: # BB#0: 73; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 74; X64-NEXT: retq 75 %1 = bitcast <4 x double> %a0 to <4 x i64> 76 %2 = bitcast <4 x double> %a1 to <4 x i64> 77 %res = and <4 x i64> %1, %2 78 %bc = bitcast <4 x i64> %res to <4 x double> 79 ret <4 x double> %bc 80} 81 82define <8 x float> @test_mm256_and_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 83; X32-LABEL: test_mm256_and_ps: 84; X32: # BB#0: 85; X32-NEXT: vandps %ymm1, %ymm0, %ymm0 86; X32-NEXT: retl 87; 88; X64-LABEL: test_mm256_and_ps: 89; X64: # BB#0: 90; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 91; X64-NEXT: retq 92 %1 = bitcast <8 x float> %a0 to <8 x i32> 93 %2 = bitcast <8 x float> %a1 to <8 x i32> 94 %res = and <8 x i32> %1, %2 95 %bc = bitcast <8 x i32> %res to <8 x float> 96 ret <8 x float> %bc 97} 98 99define <4 x double> @test_mm256_andnot_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 100; X32-LABEL: test_mm256_andnot_pd: 101; X32: # BB#0: 102; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 103; X32-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 104; X32-NEXT: vxorps %ymm2, %ymm0, %ymm0 105; X32-NEXT: vandps %ymm1, %ymm0, %ymm0 106; X32-NEXT: retl 107; 108; X64-LABEL: test_mm256_andnot_pd: 109; X64: # BB#0: 110; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 111; X64-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 112; X64-NEXT: vxorps %ymm2, %ymm0, %ymm0 113; X64-NEXT: vandps %ymm1, %ymm0, %ymm0 114; X64-NEXT: retq 115 %1 = bitcast <4 x double> %a0 to <4 x i64> 116 %2 = bitcast <4 x double> %a1 to <4 x i64> 117 %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1> 118 %res = and <4 x i64> %3, %2 119 %bc = bitcast <4 x i64> %res to <4 x double> 120 ret <4 x double> %bc 121} 122 123define <8 x float> @test_mm256_andnot_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 124; X32-LABEL: test_mm256_andnot_ps: 125; X32: # BB#0: 126; X32-NEXT: vandnps %ymm1, %ymm0, %ymm0 127; X32-NEXT: retl 128; 129; X64-LABEL: test_mm256_andnot_ps: 130; X64: # BB#0: 131; X64-NEXT: vandnps %ymm1, %ymm0, %ymm0 132; X64-NEXT: retq 133 %1 = bitcast <8 x float> %a0 to <8 x i32> 134 %2 = bitcast <8 x float> %a1 to <8 x i32> 135 %3 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 136 %res = and <8 x i32> %3, %2 137 %bc = bitcast <8 x i32> %res to <8 x float> 138 ret <8 x float> %bc 139} 140 141define <4 x double> @test_mm256_blend_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 142; X32-LABEL: test_mm256_blend_pd: 143; X32: # BB#0: 144; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 145; X32-NEXT: retl 146; 147; X64-LABEL: test_mm256_blend_pd: 148; X64: # BB#0: 149; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 150; X64-NEXT: retq 151 %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 152 ret <4 x double> %res 153} 154 155define <8 x float> @test_mm256_blend_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 156; X32-LABEL: test_mm256_blend_ps: 157; X32: # BB#0: 158; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4,5,6],ymm1[7] 159; X32-NEXT: retl 160; 161; X64-LABEL: test_mm256_blend_ps: 162; X64: # BB#0: 163; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4,5,6],ymm1[7] 164; X64-NEXT: retq 165 %res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> 166 ret <8 x float> %res 167} 168 169define <4 x double> @test_mm256_blendv_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) nounwind { 170; X32-LABEL: test_mm256_blendv_pd: 171; X32: # BB#0: 172; X32-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 173; X32-NEXT: retl 174; 175; X64-LABEL: test_mm256_blendv_pd: 176; X64: # BB#0: 177; X64-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 178; X64-NEXT: retq 179 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 180 ret <4 x double> %res 181} 182declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 183 184define <8 x float> @test_mm256_blendv_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind { 185; X32-LABEL: test_mm256_blendv_ps: 186; X32: # BB#0: 187; X32-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 188; X32-NEXT: retl 189; 190; X64-LABEL: test_mm256_blendv_ps: 191; X64: # BB#0: 192; X64-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 193; X64-NEXT: retq 194 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 195 ret <8 x float> %res 196} 197declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 198 199define <4 x double> @test_mm256_broadcast_pd(<2 x double>* %a0) nounwind { 200; X32-LABEL: test_mm256_broadcast_pd: 201; X32: # BB#0: 202; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 203; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 204; X32-NEXT: retl 205; 206; X64-LABEL: test_mm256_broadcast_pd: 207; X64: # BB#0: 208; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 209; X64-NEXT: retq 210 %arg0 = bitcast <2 x double>* %a0 to i8* 211 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %arg0) 212 ret <4 x double> %res 213} 214declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 215 216define <8 x float> @test_mm256_broadcast_ps(<4 x float>* %a0) nounwind { 217; X32-LABEL: test_mm256_broadcast_ps: 218; X32: # BB#0: 219; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 220; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 221; X32-NEXT: retl 222; 223; X64-LABEL: test_mm256_broadcast_ps: 224; X64: # BB#0: 225; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 226; X64-NEXT: retq 227 %arg0 = bitcast <4 x float>* %a0 to i8* 228 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %arg0) 229 ret <8 x float> %res 230} 231declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 232 233define <4 x double> @test_mm256_broadcast_sd(double* %a0) nounwind { 234; X32-LABEL: test_mm256_broadcast_sd: 235; X32: # BB#0: 236; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 237; X32-NEXT: vbroadcastsd (%eax), %ymm0 238; X32-NEXT: retl 239; 240; X64-LABEL: test_mm256_broadcast_sd: 241; X64: # BB#0: 242; X64-NEXT: vbroadcastsd (%rdi), %ymm0 243; X64-NEXT: retq 244 %ld = load double, double* %a0 245 %ins0 = insertelement <4 x double> undef, double %ld, i32 0 246 %ins1 = insertelement <4 x double> %ins0, double %ld, i32 1 247 %ins2 = insertelement <4 x double> %ins1, double %ld, i32 2 248 %ins3 = insertelement <4 x double> %ins2, double %ld, i32 3 249 ret <4 x double> %ins3 250} 251 252define <4 x float> @test_mm_broadcast_ss(float* %a0) nounwind { 253; X32-LABEL: test_mm_broadcast_ss: 254; X32: # BB#0: 255; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 256; X32-NEXT: vbroadcastss (%eax), %xmm0 257; X32-NEXT: retl 258; 259; X64-LABEL: test_mm_broadcast_ss: 260; X64: # BB#0: 261; X64-NEXT: vbroadcastss (%rdi), %xmm0 262; X64-NEXT: retq 263 %ld = load float, float* %a0 264 %ins0 = insertelement <4 x float> undef, float %ld, i32 0 265 %ins1 = insertelement <4 x float> %ins0, float %ld, i32 1 266 %ins2 = insertelement <4 x float> %ins1, float %ld, i32 2 267 %ins3 = insertelement <4 x float> %ins2, float %ld, i32 3 268 ret <4 x float> %ins3 269} 270 271define <8 x float> @test_mm256_broadcast_ss(float* %a0) nounwind { 272; X32-LABEL: test_mm256_broadcast_ss: 273; X32: # BB#0: 274; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 275; X32-NEXT: vbroadcastss (%eax), %ymm0 276; X32-NEXT: retl 277; 278; X64-LABEL: test_mm256_broadcast_ss: 279; X64: # BB#0: 280; X64-NEXT: vbroadcastss (%rdi), %ymm0 281; X64-NEXT: retq 282 %ld = load float, float* %a0 283 %ins0 = insertelement <8 x float> undef, float %ld, i32 0 284 %ins1 = insertelement <8 x float> %ins0, float %ld, i32 1 285 %ins2 = insertelement <8 x float> %ins1, float %ld, i32 2 286 %ins3 = insertelement <8 x float> %ins2, float %ld, i32 3 287 %ins4 = insertelement <8 x float> %ins3, float %ld, i32 4 288 %ins5 = insertelement <8 x float> %ins4, float %ld, i32 5 289 %ins6 = insertelement <8 x float> %ins5, float %ld, i32 6 290 %ins7 = insertelement <8 x float> %ins6, float %ld, i32 7 291 ret <8 x float> %ins7 292} 293 294define <8 x float> @test_mm256_castpd_ps(<4 x double> %a0) nounwind { 295; X32-LABEL: test_mm256_castpd_ps: 296; X32: # BB#0: 297; X32-NEXT: retl 298; 299; X64-LABEL: test_mm256_castpd_ps: 300; X64: # BB#0: 301; X64-NEXT: retq 302 %res = bitcast <4 x double> %a0 to <8 x float> 303 ret <8 x float> %res 304} 305 306define <4 x i64> @test_mm256_castpd_si256(<4 x double> %a0) nounwind { 307; X32-LABEL: test_mm256_castpd_si256: 308; X32: # BB#0: 309; X32-NEXT: retl 310; 311; X64-LABEL: test_mm256_castpd_si256: 312; X64: # BB#0: 313; X64-NEXT: retq 314 %res = bitcast <4 x double> %a0 to <4 x i64> 315 ret <4 x i64> %res 316} 317 318define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind { 319; X32-LABEL: test_mm256_castpd128_pd256: 320; X32: # BB#0: 321; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 322; X32-NEXT: retl 323; 324; X64-LABEL: test_mm256_castpd128_pd256: 325; X64: # BB#0: 326; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 327; X64-NEXT: retq 328 %res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 329 ret <4 x double> %res 330} 331 332define <2 x double> @test_mm256_castpd256_pd128(<4 x double> %a0) nounwind { 333; X32-LABEL: test_mm256_castpd256_pd128: 334; X32: # BB#0: 335; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 336; X32-NEXT: vzeroupper 337; X32-NEXT: retl 338; 339; X64-LABEL: test_mm256_castpd256_pd128: 340; X64: # BB#0: 341; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 342; X64-NEXT: vzeroupper 343; X64-NEXT: retq 344 %res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32> <i32 0, i32 1> 345 ret <2 x double> %res 346} 347 348define <4 x double> @test_mm256_castps_pd(<8 x float> %a0) nounwind { 349; X32-LABEL: test_mm256_castps_pd: 350; X32: # BB#0: 351; X32-NEXT: retl 352; 353; X64-LABEL: test_mm256_castps_pd: 354; X64: # BB#0: 355; X64-NEXT: retq 356 %res = bitcast <8 x float> %a0 to <4 x double> 357 ret <4 x double> %res 358} 359 360define <4 x i64> @test_mm256_castps_si256(<8 x float> %a0) nounwind { 361; X32-LABEL: test_mm256_castps_si256: 362; X32: # BB#0: 363; X32-NEXT: retl 364; 365; X64-LABEL: test_mm256_castps_si256: 366; X64: # BB#0: 367; X64-NEXT: retq 368 %res = bitcast <8 x float> %a0 to <4 x i64> 369 ret <4 x i64> %res 370} 371 372define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind { 373; X32-LABEL: test_mm256_castps128_ps256: 374; X32: # BB#0: 375; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 376; X32-NEXT: retl 377; 378; X64-LABEL: test_mm256_castps128_ps256: 379; X64: # BB#0: 380; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 381; X64-NEXT: retq 382 %res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 383 ret <8 x float> %res 384} 385 386define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind { 387; X32-LABEL: test_mm256_castps256_ps128: 388; X32: # BB#0: 389; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 390; X32-NEXT: vzeroupper 391; X32-NEXT: retl 392; 393; X64-LABEL: test_mm256_castps256_ps128: 394; X64: # BB#0: 395; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 396; X64-NEXT: vzeroupper 397; X64-NEXT: retq 398 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 399 ret <4 x float> %res 400} 401 402define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind { 403; X32-LABEL: test_mm256_castsi128_si256: 404; X32: # BB#0: 405; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 406; X32-NEXT: retl 407; 408; X64-LABEL: test_mm256_castsi128_si256: 409; X64: # BB#0: 410; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 411; X64-NEXT: retq 412 %res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 413 ret <4 x i64> %res 414} 415 416define <4 x double> @test_mm256_castsi256_pd(<4 x i64> %a0) nounwind { 417; X32-LABEL: test_mm256_castsi256_pd: 418; X32: # BB#0: 419; X32-NEXT: retl 420; 421; X64-LABEL: test_mm256_castsi256_pd: 422; X64: # BB#0: 423; X64-NEXT: retq 424 %res = bitcast <4 x i64> %a0 to <4 x double> 425 ret <4 x double> %res 426} 427 428define <8 x float> @test_mm256_castsi256_ps(<4 x i64> %a0) nounwind { 429; X32-LABEL: test_mm256_castsi256_ps: 430; X32: # BB#0: 431; X32-NEXT: retl 432; 433; X64-LABEL: test_mm256_castsi256_ps: 434; X64: # BB#0: 435; X64-NEXT: retq 436 %res = bitcast <4 x i64> %a0 to <8 x float> 437 ret <8 x float> %res 438} 439 440define <2 x i64> @test_mm256_castsi256_si128(<4 x i64> %a0) nounwind { 441; X32-LABEL: test_mm256_castsi256_si128: 442; X32: # BB#0: 443; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 444; X32-NEXT: vzeroupper 445; X32-NEXT: retl 446; 447; X64-LABEL: test_mm256_castsi256_si128: 448; X64: # BB#0: 449; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 450; X64-NEXT: vzeroupper 451; X64-NEXT: retq 452 %res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 0, i32 1> 453 ret <2 x i64> %res 454} 455 456define <4 x double> @test_mm256_ceil_pd(<4 x double> %a0) nounwind { 457; X32-LABEL: test_mm256_ceil_pd: 458; X32: # BB#0: 459; X32-NEXT: vroundpd $2, %ymm0, %ymm0 460; X32-NEXT: retl 461; 462; X64-LABEL: test_mm256_ceil_pd: 463; X64: # BB#0: 464; X64-NEXT: vroundpd $2, %ymm0, %ymm0 465; X64-NEXT: retq 466 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 2) 467 ret <4 x double> %res 468} 469declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 470 471define <8 x float> @test_mm256_ceil_ps(<8 x float> %a0) nounwind { 472; X32-LABEL: test_mm256_ceil_ps: 473; X32: # BB#0: 474; X32-NEXT: vroundps $2, %ymm0, %ymm0 475; X32-NEXT: retl 476; 477; X64-LABEL: test_mm256_ceil_ps: 478; X64: # BB#0: 479; X64-NEXT: vroundps $2, %ymm0, %ymm0 480; X64-NEXT: retq 481 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 2) 482 ret <8 x float> %res 483} 484declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 485 486define <2 x double> @test_mm_cmp_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 487; X32-LABEL: test_mm_cmp_pd: 488; X32: # BB#0: 489; X32-NEXT: vcmpgepd %xmm1, %xmm0, %xmm0 490; X32-NEXT: retl 491; 492; X64-LABEL: test_mm_cmp_pd: 493; X64: # BB#0: 494; X64-NEXT: vcmpgepd %xmm1, %xmm0, %xmm0 495; X64-NEXT: retq 496 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 13) 497 ret <2 x double> %res 498} 499declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 500 501define <4 x double> @test_mm256_cmp_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 502; X32-LABEL: test_mm256_cmp_pd: 503; X32: # BB#0: 504; X32-NEXT: vcmpgepd %ymm1, %ymm0, %ymm0 505; X32-NEXT: retl 506; 507; X64-LABEL: test_mm256_cmp_pd: 508; X64: # BB#0: 509; X64-NEXT: vcmpgepd %ymm1, %ymm0, %ymm0 510; X64-NEXT: retq 511 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 13) 512 ret <4 x double> %res 513} 514declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 515 516define <4 x float> @test_mm_cmp_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 517; X32-LABEL: test_mm_cmp_ps: 518; X32: # BB#0: 519; X32-NEXT: vcmpgeps %xmm1, %xmm0, %xmm0 520; X32-NEXT: retl 521; 522; X64-LABEL: test_mm_cmp_ps: 523; X64: # BB#0: 524; X64-NEXT: vcmpgeps %xmm1, %xmm0, %xmm0 525; X64-NEXT: retq 526 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 13) 527 ret <4 x float> %res 528} 529declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 530 531define <8 x float> @test_mm256_cmp_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 532; X32-LABEL: test_mm256_cmp_ps: 533; X32: # BB#0: 534; X32-NEXT: vcmpgeps %ymm1, %ymm0, %ymm0 535; X32-NEXT: retl 536; 537; X64-LABEL: test_mm256_cmp_ps: 538; X64: # BB#0: 539; X64-NEXT: vcmpgeps %ymm1, %ymm0, %ymm0 540; X64-NEXT: retq 541 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 13) 542 ret <8 x float> %res 543} 544declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 545 546define <2 x double> @test_mm_cmp_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 547; X32-LABEL: test_mm_cmp_sd: 548; X32: # BB#0: 549; X32-NEXT: vcmpgesd %xmm1, %xmm0, %xmm0 550; X32-NEXT: retl 551; 552; X64-LABEL: test_mm_cmp_sd: 553; X64: # BB#0: 554; X64-NEXT: vcmpgesd %xmm1, %xmm0, %xmm0 555; X64-NEXT: retq 556 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 13) 557 ret <2 x double> %res 558} 559declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 560 561define <4 x float> @test_mm_cmp_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 562; X32-LABEL: test_mm_cmp_ss: 563; X32: # BB#0: 564; X32-NEXT: vcmpgess %xmm1, %xmm0, %xmm0 565; X32-NEXT: retl 566; 567; X64-LABEL: test_mm_cmp_ss: 568; X64: # BB#0: 569; X64-NEXT: vcmpgess %xmm1, %xmm0, %xmm0 570; X64-NEXT: retq 571 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 13) 572 ret <4 x float> %res 573} 574declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 575 576define <4 x double> @test_mm256_cvtepi32_pd(<2 x i64> %a0) nounwind { 577; X32-LABEL: test_mm256_cvtepi32_pd: 578; X32: # BB#0: 579; X32-NEXT: vcvtdq2pd %xmm0, %ymm0 580; X32-NEXT: retl 581; 582; X64-LABEL: test_mm256_cvtepi32_pd: 583; X64: # BB#0: 584; X64-NEXT: vcvtdq2pd %xmm0, %ymm0 585; X64-NEXT: retq 586 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 587 %res = sitofp <4 x i32> %arg0 to <4 x double> 588 ret <4 x double> %res 589} 590 591define <8 x float> @test_mm256_cvtepi32_ps(<4 x i64> %a0) nounwind { 592; X32-LABEL: test_mm256_cvtepi32_ps: 593; X32: # BB#0: 594; X32-NEXT: vcvtdq2ps %ymm0, %ymm0 595; X32-NEXT: retl 596; 597; X64-LABEL: test_mm256_cvtepi32_ps: 598; X64: # BB#0: 599; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 600; X64-NEXT: retq 601 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 602 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %arg0) 603 ret <8 x float> %res 604} 605declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 606 607define <2 x i64> @test_mm256_cvtpd_epi32(<4 x double> %a0) nounwind { 608; X32-LABEL: test_mm256_cvtpd_epi32: 609; X32: # BB#0: 610; X32-NEXT: vcvtpd2dqy %ymm0, %xmm0 611; X32-NEXT: vzeroupper 612; X32-NEXT: retl 613; 614; X64-LABEL: test_mm256_cvtpd_epi32: 615; X64: # BB#0: 616; X64-NEXT: vcvtpd2dqy %ymm0, %xmm0 617; X64-NEXT: vzeroupper 618; X64-NEXT: retq 619 %cvt = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 620 %res = bitcast <4 x i32> %cvt to <2 x i64> 621 ret <2 x i64> %res 622} 623declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 624 625define <4 x float> @test_mm256_cvtpd_ps(<4 x double> %a0) nounwind { 626; X32-LABEL: test_mm256_cvtpd_ps: 627; X32: # BB#0: 628; X32-NEXT: vcvtpd2psy %ymm0, %xmm0 629; X32-NEXT: vzeroupper 630; X32-NEXT: retl 631; 632; X64-LABEL: test_mm256_cvtpd_ps: 633; X64: # BB#0: 634; X64-NEXT: vcvtpd2psy %ymm0, %xmm0 635; X64-NEXT: vzeroupper 636; X64-NEXT: retq 637 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) 638 ret <4 x float> %res 639} 640declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 641 642define <4 x i64> @test_mm256_cvtps_epi32(<8 x float> %a0) nounwind { 643; X32-LABEL: test_mm256_cvtps_epi32: 644; X32: # BB#0: 645; X32-NEXT: vcvtps2dq %ymm0, %ymm0 646; X32-NEXT: retl 647; 648; X64-LABEL: test_mm256_cvtps_epi32: 649; X64: # BB#0: 650; X64-NEXT: vcvtps2dq %ymm0, %ymm0 651; X64-NEXT: retq 652 %cvt = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 653 %res = bitcast <8 x i32> %cvt to <4 x i64> 654 ret <4 x i64> %res 655} 656declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 657 658define <4 x double> @test_mm256_cvtps_pd(<4 x float> %a0) nounwind { 659; X32-LABEL: test_mm256_cvtps_pd: 660; X32: # BB#0: 661; X32-NEXT: vcvtps2pd %xmm0, %ymm0 662; X32-NEXT: retl 663; 664; X64-LABEL: test_mm256_cvtps_pd: 665; X64: # BB#0: 666; X64-NEXT: vcvtps2pd %xmm0, %ymm0 667; X64-NEXT: retq 668 %res = fpext <4 x float> %a0 to <4 x double> 669 ret <4 x double> %res 670} 671 672define <2 x i64> @test_mm256_cvttpd_epi32(<4 x double> %a0) nounwind { 673; X32-LABEL: test_mm256_cvttpd_epi32: 674; X32: # BB#0: 675; X32-NEXT: vcvttpd2dqy %ymm0, %xmm0 676; X32-NEXT: vzeroupper 677; X32-NEXT: retl 678; 679; X64-LABEL: test_mm256_cvttpd_epi32: 680; X64: # BB#0: 681; X64-NEXT: vcvttpd2dqy %ymm0, %xmm0 682; X64-NEXT: vzeroupper 683; X64-NEXT: retq 684 %cvt = fptosi <4 x double> %a0 to <4 x i32> 685 %res = bitcast <4 x i32> %cvt to <2 x i64> 686 ret <2 x i64> %res 687} 688 689define <4 x i64> @test_mm256_cvttps_epi32(<8 x float> %a0) nounwind { 690; X32-LABEL: test_mm256_cvttps_epi32: 691; X32: # BB#0: 692; X32-NEXT: vcvttps2dq %ymm0, %ymm0 693; X32-NEXT: retl 694; 695; X64-LABEL: test_mm256_cvttps_epi32: 696; X64: # BB#0: 697; X64-NEXT: vcvttps2dq %ymm0, %ymm0 698; X64-NEXT: retq 699 %cvt = fptosi <8 x float> %a0 to <8 x i32> 700 %res = bitcast <8 x i32> %cvt to <4 x i64> 701 ret <4 x i64> %res 702} 703 704define <4 x double> @test_mm256_div_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 705; X32-LABEL: test_mm256_div_pd: 706; X32: # BB#0: 707; X32-NEXT: vdivpd %ymm1, %ymm0, %ymm0 708; X32-NEXT: retl 709; 710; X64-LABEL: test_mm256_div_pd: 711; X64: # BB#0: 712; X64-NEXT: vdivpd %ymm1, %ymm0, %ymm0 713; X64-NEXT: retq 714 %res = fdiv <4 x double> %a0, %a1 715 ret <4 x double> %res 716} 717 718define <8 x float> @test_mm256_div_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 719; X32-LABEL: test_mm256_div_ps: 720; X32: # BB#0: 721; X32-NEXT: vdivps %ymm1, %ymm0, %ymm0 722; X32-NEXT: retl 723; 724; X64-LABEL: test_mm256_div_ps: 725; X64: # BB#0: 726; X64-NEXT: vdivps %ymm1, %ymm0, %ymm0 727; X64-NEXT: retq 728 %res = fdiv <8 x float> %a0, %a1 729 ret <8 x float> %res 730} 731 732define <8 x float> @test_mm256_dp_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 733; X32-LABEL: test_mm256_dp_ps: 734; X32: # BB#0: 735; X32-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 736; X32-NEXT: retl 737; 738; X64-LABEL: test_mm256_dp_ps: 739; X64: # BB#0: 740; X64-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 741; X64-NEXT: retq 742 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 743 ret <8 x float> %res 744} 745declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 746 747define i32 @test_mm256_extract_epi8(<4 x i64> %a0) nounwind { 748; X32-LABEL: test_mm256_extract_epi8: 749; X32: # BB#0: 750; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 751; X32-NEXT: vpextrb $15, %xmm0, %eax 752; X32-NEXT: movzbl %al, %eax 753; X32-NEXT: vzeroupper 754; X32-NEXT: retl 755; 756; X64-LABEL: test_mm256_extract_epi8: 757; X64: # BB#0: 758; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 759; X64-NEXT: vpextrb $15, %xmm0, %eax 760; X64-NEXT: movzbl %al, %eax 761; X64-NEXT: vzeroupper 762; X64-NEXT: retq 763 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 764 %ext = extractelement <32 x i8> %arg0, i32 31 765 %res = zext i8 %ext to i32 766 ret i32 %res 767} 768 769define i32 @test_mm256_extract_epi16(<4 x i64> %a0) nounwind { 770; X32-LABEL: test_mm256_extract_epi16: 771; X32: # BB#0: 772; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 773; X32-NEXT: vpextrw $3, %xmm0, %eax 774; X32-NEXT: movzwl %ax, %eax 775; X32-NEXT: vzeroupper 776; X32-NEXT: retl 777; 778; X64-LABEL: test_mm256_extract_epi16: 779; X64: # BB#0: 780; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 781; X64-NEXT: vpextrw $3, %xmm0, %eax 782; X64-NEXT: movzwl %ax, %eax 783; X64-NEXT: vzeroupper 784; X64-NEXT: retq 785 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 786 %ext = extractelement <16 x i16> %arg0, i32 11 787 %res = zext i16 %ext to i32 788 ret i32 %res 789} 790 791define i32 @test_mm256_extract_epi32(<4 x i64> %a0) nounwind { 792; X32-LABEL: test_mm256_extract_epi32: 793; X32: # BB#0: 794; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 795; X32-NEXT: vpextrd $1, %xmm0, %eax 796; X32-NEXT: vzeroupper 797; X32-NEXT: retl 798; 799; X64-LABEL: test_mm256_extract_epi32: 800; X64: # BB#0: 801; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 802; X64-NEXT: vpextrd $1, %xmm0, %eax 803; X64-NEXT: vzeroupper 804; X64-NEXT: retq 805 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 806 %res = extractelement <8 x i32> %arg0, i32 5 807 ret i32 %res 808} 809 810define i64 @test_mm256_extract_epi64(<4 x i64> %a0) nounwind { 811; X32-LABEL: test_mm256_extract_epi64: 812; X32: # BB#0: 813; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 814; X32-NEXT: vpextrd $2, %xmm0, %eax 815; X32-NEXT: vpextrd $3, %xmm0, %edx 816; X32-NEXT: vzeroupper 817; X32-NEXT: retl 818; 819; X64-LABEL: test_mm256_extract_epi64: 820; X64: # BB#0: 821; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 822; X64-NEXT: vpextrq $1, %xmm0, %rax 823; X64-NEXT: vzeroupper 824; X64-NEXT: retq 825 %res = extractelement <4 x i64> %a0, i32 3 826 ret i64 %res 827} 828 829define <2 x double> @test_mm256_extractf128_pd(<4 x double> %a0) nounwind { 830; X32-LABEL: test_mm256_extractf128_pd: 831; X32: # BB#0: 832; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 833; X32-NEXT: vzeroupper 834; X32-NEXT: retl 835; 836; X64-LABEL: test_mm256_extractf128_pd: 837; X64: # BB#0: 838; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 839; X64-NEXT: vzeroupper 840; X64-NEXT: retq 841 %res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32> <i32 2, i32 3> 842 ret <2 x double> %res 843} 844 845define <4 x float> @test_mm256_extractf128_ps(<8 x float> %a0) nounwind { 846; X32-LABEL: test_mm256_extractf128_ps: 847; X32: # BB#0: 848; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 849; X32-NEXT: vzeroupper 850; X32-NEXT: retl 851; 852; X64-LABEL: test_mm256_extractf128_ps: 853; X64: # BB#0: 854; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 855; X64-NEXT: vzeroupper 856; X64-NEXT: retq 857 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 858 ret <4 x float> %res 859} 860 861define <2 x i64> @test_mm256_extractf128_si256(<4 x i64> %a0) nounwind { 862; X32-LABEL: test_mm256_extractf128_si256: 863; X32: # BB#0: 864; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 865; X32-NEXT: vzeroupper 866; X32-NEXT: retl 867; 868; X64-LABEL: test_mm256_extractf128_si256: 869; X64: # BB#0: 870; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 871; X64-NEXT: vzeroupper 872; X64-NEXT: retq 873 %res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 2, i32 3> 874 ret <2 x i64> %res 875} 876 877define <4 x double> @test_mm256_floor_pd(<4 x double> %a0) nounwind { 878; X32-LABEL: test_mm256_floor_pd: 879; X32: # BB#0: 880; X32-NEXT: vroundpd $1, %ymm0, %ymm0 881; X32-NEXT: retl 882; 883; X64-LABEL: test_mm256_floor_pd: 884; X64: # BB#0: 885; X64-NEXT: vroundpd $1, %ymm0, %ymm0 886; X64-NEXT: retq 887 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 1) 888 ret <4 x double> %res 889} 890 891define <8 x float> @test_mm256_floor_ps(<8 x float> %a0) nounwind { 892; X32-LABEL: test_mm256_floor_ps: 893; X32: # BB#0: 894; X32-NEXT: vroundps $1, %ymm0, %ymm0 895; X32-NEXT: retl 896; 897; X64-LABEL: test_mm256_floor_ps: 898; X64: # BB#0: 899; X64-NEXT: vroundps $1, %ymm0, %ymm0 900; X64-NEXT: retq 901 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 1) 902 ret <8 x float> %res 903} 904 905define <4 x double> @test_mm256_hadd_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 906; X32-LABEL: test_mm256_hadd_pd: 907; X32: # BB#0: 908; X32-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 909; X32-NEXT: retl 910; 911; X64-LABEL: test_mm256_hadd_pd: 912; X64: # BB#0: 913; X64-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 914; X64-NEXT: retq 915 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 916 ret <4 x double> %res 917} 918declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 919 920define <8 x float> @test_mm256_hadd_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 921; X32-LABEL: test_mm256_hadd_ps: 922; X32: # BB#0: 923; X32-NEXT: vhaddps %ymm1, %ymm0, %ymm0 924; X32-NEXT: retl 925; 926; X64-LABEL: test_mm256_hadd_ps: 927; X64: # BB#0: 928; X64-NEXT: vhaddps %ymm1, %ymm0, %ymm0 929; X64-NEXT: retq 930 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 931 ret <8 x float> %res 932} 933declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 934 935define <4 x double> @test_mm256_hsub_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 936; X32-LABEL: test_mm256_hsub_pd: 937; X32: # BB#0: 938; X32-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 939; X32-NEXT: retl 940; 941; X64-LABEL: test_mm256_hsub_pd: 942; X64: # BB#0: 943; X64-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 944; X64-NEXT: retq 945 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 946 ret <4 x double> %res 947} 948declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 949 950define <8 x float> @test_mm256_hsub_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 951; X32-LABEL: test_mm256_hsub_ps: 952; X32: # BB#0: 953; X32-NEXT: vhsubps %ymm1, %ymm0, %ymm0 954; X32-NEXT: retl 955; 956; X64-LABEL: test_mm256_hsub_ps: 957; X64: # BB#0: 958; X64-NEXT: vhsubps %ymm1, %ymm0, %ymm0 959; X64-NEXT: retq 960 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 961 ret <8 x float> %res 962} 963declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 964 965define <4 x i64> @test_mm256_insert_epi8(<4 x i64> %a0, i8 %a1) nounwind { 966; X32-LABEL: test_mm256_insert_epi8: 967; X32: # BB#0: 968; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 969; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1 970; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 971; X32-NEXT: retl 972; 973; X64-LABEL: test_mm256_insert_epi8: 974; X64: # BB#0: 975; X64-NEXT: movzbl %dil, %eax 976; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1 977; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 978; X64-NEXT: retq 979 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 980 %res = insertelement <32 x i8> %arg0, i8 %a1, i32 4 981 %bc = bitcast <32 x i8> %res to <4 x i64> 982 ret <4 x i64> %bc 983} 984 985define <4 x i64> @test_mm256_insert_epi16(<4 x i64> %a0, i16 %a1) nounwind { 986; X32-LABEL: test_mm256_insert_epi16: 987; X32: # BB#0: 988; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 989; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 990; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 991; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 992; X32-NEXT: retl 993; 994; X64-LABEL: test_mm256_insert_epi16: 995; X64: # BB#0: 996; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 997; X64-NEXT: vpinsrw $6, %edi, %xmm1, %xmm1 998; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 999; X64-NEXT: retq 1000 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 1001 %res = insertelement <16 x i16> %arg0, i16 %a1, i32 14 1002 %bc = bitcast <16 x i16> %res to <4 x i64> 1003 ret <4 x i64> %bc 1004} 1005 1006define <4 x i64> @test_mm256_insert_epi32(<4 x i64> %a0, i32 %a1) nounwind { 1007; X32-LABEL: test_mm256_insert_epi32: 1008; X32: # BB#0: 1009; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm1 1010; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1011; X32-NEXT: retl 1012; 1013; X64-LABEL: test_mm256_insert_epi32: 1014; X64: # BB#0: 1015; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm1 1016; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1017; X64-NEXT: retq 1018 %arg0 = bitcast <4 x i64> %a0 to <8 x i32> 1019 %res = insertelement <8 x i32> %arg0, i32 %a1, i32 3 1020 %bc = bitcast <8 x i32> %res to <4 x i64> 1021 ret <4 x i64> %bc 1022} 1023 1024define <4 x i64> @test_mm256_insert_epi64(<4 x i64> %a0, i64 %a1) nounwind { 1025; X32-LABEL: test_mm256_insert_epi64: 1026; X32: # BB#0: 1027; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 1028; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 1029; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm2 1030; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1031; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1032; X32-NEXT: retl 1033; 1034; X64-LABEL: test_mm256_insert_epi64: 1035; X64: # BB#0: 1036; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 1037; X64-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 1038; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1039; X64-NEXT: retq 1040 %res = insertelement <4 x i64> %a0, i64 %a1, i32 3 1041 ret <4 x i64> %res 1042} 1043 1044define <4 x double> @test_mm256_insertf128_pd(<4 x double> %a0, <2 x double> %a1) nounwind { 1045; X32-LABEL: test_mm256_insertf128_pd: 1046; X32: # BB#0: 1047; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 1048; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1049; X32-NEXT: retl 1050; 1051; X64-LABEL: test_mm256_insertf128_pd: 1052; X64: # BB#0: 1053; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 1054; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1055; X64-NEXT: retq 1056 %ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1057 %res = shufflevector <4 x double> %a0, <4 x double> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1058 ret <4 x double> %res 1059} 1060 1061define <8 x float> @test_mm256_insertf128_ps(<8 x float> %a0, <4 x float> %a1) nounwind { 1062; X32-LABEL: test_mm256_insertf128_ps: 1063; X32: # BB#0: 1064; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1065; X32-NEXT: retl 1066; 1067; X64-LABEL: test_mm256_insertf128_ps: 1068; X64: # BB#0: 1069; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1070; X64-NEXT: retq 1071 %ext = shufflevector <4 x float> %a1, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1072 %res = shufflevector <8 x float> %a0, <8 x float> %ext, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1073 ret <8 x float> %res 1074} 1075 1076define <4 x i64> @test_mm256_insertf128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind { 1077; X32-LABEL: test_mm256_insertf128_si256: 1078; X32: # BB#0: 1079; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 1080; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1081; X32-NEXT: retl 1082; 1083; X64-LABEL: test_mm256_insertf128_si256: 1084; X64: # BB#0: 1085; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 1086; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 1087; X64-NEXT: retq 1088 %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1089 %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1090 ret <4 x i64> %res 1091} 1092 1093define <4 x i64> @test_mm256_lddqu_si256(<4 x i64>* %a0) nounwind { 1094; X32-LABEL: test_mm256_lddqu_si256: 1095; X32: # BB#0: 1096; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1097; X32-NEXT: vlddqu (%eax), %ymm0 1098; X32-NEXT: retl 1099; 1100; X64-LABEL: test_mm256_lddqu_si256: 1101; X64: # BB#0: 1102; X64-NEXT: vlddqu (%rdi), %ymm0 1103; X64-NEXT: retq 1104 %arg0 = bitcast <4 x i64>* %a0 to i8* 1105 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %arg0) 1106 %bc = bitcast <32 x i8> %res to <4 x i64> 1107 ret <4 x i64> %bc 1108} 1109declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readnone 1110 1111define <4 x double> @test_mm256_load_pd(double* %a0) nounwind { 1112; X32-LABEL: test_mm256_load_pd: 1113; X32: # BB#0: 1114; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1115; X32-NEXT: vmovaps (%eax), %ymm0 1116; X32-NEXT: retl 1117; 1118; X64-LABEL: test_mm256_load_pd: 1119; X64: # BB#0: 1120; X64-NEXT: vmovaps (%rdi), %ymm0 1121; X64-NEXT: retq 1122 %arg0 = bitcast double* %a0 to <4 x double>* 1123 %res = load <4 x double>, <4 x double>* %arg0, align 32 1124 ret <4 x double> %res 1125} 1126 1127define <8 x float> @test_mm256_load_ps(float* %a0) nounwind { 1128; X32-LABEL: test_mm256_load_ps: 1129; X32: # BB#0: 1130; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1131; X32-NEXT: vmovaps (%eax), %ymm0 1132; X32-NEXT: retl 1133; 1134; X64-LABEL: test_mm256_load_ps: 1135; X64: # BB#0: 1136; X64-NEXT: vmovaps (%rdi), %ymm0 1137; X64-NEXT: retq 1138 %arg0 = bitcast float* %a0 to <8 x float>* 1139 %res = load <8 x float>, <8 x float>* %arg0, align 32 1140 ret <8 x float> %res 1141} 1142 1143define <4 x i64> @test_mm256_load_si256(<4 x i64>* %a0) nounwind { 1144; X32-LABEL: test_mm256_load_si256: 1145; X32: # BB#0: 1146; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1147; X32-NEXT: vmovaps (%eax), %ymm0 1148; X32-NEXT: retl 1149; 1150; X64-LABEL: test_mm256_load_si256: 1151; X64: # BB#0: 1152; X64-NEXT: vmovaps (%rdi), %ymm0 1153; X64-NEXT: retq 1154 %res = load <4 x i64>, <4 x i64>* %a0, align 32 1155 ret <4 x i64> %res 1156} 1157 1158define <4 x double> @test_mm256_loadu_pd(double* %a0) nounwind { 1159; X32-LABEL: test_mm256_loadu_pd: 1160; X32: # BB#0: 1161; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1162; X32-NEXT: vmovups (%eax), %ymm0 1163; X32-NEXT: retl 1164; 1165; X64-LABEL: test_mm256_loadu_pd: 1166; X64: # BB#0: 1167; X64-NEXT: vmovups (%rdi), %ymm0 1168; X64-NEXT: retq 1169 %arg0 = bitcast double* %a0 to <4 x double>* 1170 %res = load <4 x double>, <4 x double>* %arg0, align 1 1171 ret <4 x double> %res 1172} 1173 1174define <8 x float> @test_mm256_loadu_ps(float* %a0) nounwind { 1175; X32-LABEL: test_mm256_loadu_ps: 1176; X32: # BB#0: 1177; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1178; X32-NEXT: vmovups (%eax), %ymm0 1179; X32-NEXT: retl 1180; 1181; X64-LABEL: test_mm256_loadu_ps: 1182; X64: # BB#0: 1183; X64-NEXT: vmovups (%rdi), %ymm0 1184; X64-NEXT: retq 1185 %arg0 = bitcast float* %a0 to <8 x float>* 1186 %res = load <8 x float>, <8 x float>* %arg0, align 1 1187 ret <8 x float> %res 1188} 1189 1190define <4 x i64> @test_mm256_loadu_si256(<4 x i64>* %a0) nounwind { 1191; X32-LABEL: test_mm256_loadu_si256: 1192; X32: # BB#0: 1193; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1194; X32-NEXT: vmovups (%eax), %ymm0 1195; X32-NEXT: retl 1196; 1197; X64-LABEL: test_mm256_loadu_si256: 1198; X64: # BB#0: 1199; X64-NEXT: vmovups (%rdi), %ymm0 1200; X64-NEXT: retq 1201 %res = load <4 x i64>, <4 x i64>* %a0, align 1 1202 ret <4 x i64> %res 1203} 1204 1205define <8 x float> @test_mm256_loadu2_m128(float* %a0, float* %a1) nounwind { 1206; X32-LABEL: test_mm256_loadu2_m128: 1207; X32: # BB#0: 1208; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1209; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1210; X32-NEXT: vmovups (%eax), %xmm0 1211; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0 1212; X32-NEXT: retl 1213; 1214; X64-LABEL: test_mm256_loadu2_m128: 1215; X64: # BB#0: 1216; X64-NEXT: vmovups (%rsi), %xmm0 1217; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 1218; X64-NEXT: retq 1219 %arg0 = bitcast float* %a0 to <4 x float>* 1220 %hi4 = load <4 x float>, <4 x float>* %arg0, align 1 1221 %hi8 = shufflevector <4 x float> %hi4, <4 x float> %hi4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1222 %arg1 = bitcast float* %a1 to <4 x float>* 1223 %lo4 = load <4 x float>, <4 x float>* %arg1, align 1 1224 %lo8 = shufflevector <4 x float> %lo4, <4 x float> %lo4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1225 %res = shufflevector <8 x float> %lo8, <8 x float> %hi8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1226 ret <8 x float> %res 1227} 1228 1229define <4 x double> @test_mm256_loadu2_m128d(double* %a0, double* %a1) nounwind { 1230; X32-LABEL: test_mm256_loadu2_m128d: 1231; X32: # BB#0: 1232; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1233; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1234; X32-NEXT: vmovups (%eax), %xmm0 1235; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0 1236; X32-NEXT: retl 1237; 1238; X64-LABEL: test_mm256_loadu2_m128d: 1239; X64: # BB#0: 1240; X64-NEXT: vmovups (%rsi), %xmm0 1241; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 1242; X64-NEXT: retq 1243 %arg0 = bitcast double* %a0 to <2 x double>* 1244 %hi2 = load <2 x double>, <2 x double>* %arg0, align 1 1245 %hi4 = shufflevector <2 x double> %hi2, <2 x double> %hi2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1246 %arg1 = bitcast double* %a1 to <2 x double>* 1247 %lo2 = load <2 x double>, <2 x double>* %arg1, align 1 1248 %lo4 = shufflevector <2 x double> %lo2, <2 x double> %lo2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1249 %res = shufflevector <4 x double> %lo4, <4 x double> %hi4, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1250 ret <4 x double> %res 1251} 1252 1253define <4 x i64> @test_mm256_loadu2_m128i(i64* %a0, i64* %a1) nounwind { 1254; X32-LABEL: test_mm256_loadu2_m128i: 1255; X32: # BB#0: 1256; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1257; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1258; X32-NEXT: vmovups (%eax), %xmm0 1259; X32-NEXT: vinsertf128 $1, (%ecx), %ymm0, %ymm0 1260; X32-NEXT: retl 1261; 1262; X64-LABEL: test_mm256_loadu2_m128i: 1263; X64: # BB#0: 1264; X64-NEXT: vmovups (%rsi), %xmm0 1265; X64-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 1266; X64-NEXT: retq 1267 %arg0 = bitcast i64* %a0 to <2 x i64>* 1268 %hi2 = load <2 x i64>, <2 x i64>* %arg0, align 1 1269 %hi4 = shufflevector <2 x i64> %hi2, <2 x i64> %hi2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1270 %arg1 = bitcast i64* %a1 to <2 x i64>* 1271 %lo2 = load <2 x i64>, <2 x i64>* %arg1, align 1 1272 %lo4 = shufflevector <2 x i64> %lo2, <2 x i64> %lo2, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1273 %res = shufflevector <4 x i64> %lo4, <4 x i64> %hi4, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1274 ret <4 x i64> %res 1275} 1276 1277define <2 x double> @test_mm_maskload_pd(double* %a0, <2 x i64> %a1) nounwind { 1278; X32-LABEL: test_mm_maskload_pd: 1279; X32: # BB#0: 1280; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1281; X32-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 1282; X32-NEXT: retl 1283; 1284; X64-LABEL: test_mm_maskload_pd: 1285; X64: # BB#0: 1286; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 1287; X64-NEXT: retq 1288 %arg0 = bitcast double* %a0 to i8* 1289 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %arg0, <2 x i64> %a1) 1290 ret <2 x double> %res 1291} 1292declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readnone 1293 1294define <4 x double> @test_mm256_maskload_pd(double* %a0, <4 x i64> %a1) nounwind { 1295; X32-LABEL: test_mm256_maskload_pd: 1296; X32: # BB#0: 1297; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1298; X32-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 1299; X32-NEXT: retl 1300; 1301; X64-LABEL: test_mm256_maskload_pd: 1302; X64: # BB#0: 1303; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 1304; X64-NEXT: retq 1305 %arg0 = bitcast double* %a0 to i8* 1306 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %arg0, <4 x i64> %a1) 1307 ret <4 x double> %res 1308} 1309declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readnone 1310 1311define <4 x float> @test_mm_maskload_ps(float* %a0, <2 x i64> %a1) nounwind { 1312; X32-LABEL: test_mm_maskload_ps: 1313; X32: # BB#0: 1314; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1315; X32-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 1316; X32-NEXT: retl 1317; 1318; X64-LABEL: test_mm_maskload_ps: 1319; X64: # BB#0: 1320; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 1321; X64-NEXT: retq 1322 %arg0 = bitcast float* %a0 to i8* 1323 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1324 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %arg0, <4 x i32> %arg1) 1325 ret <4 x float> %res 1326} 1327declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readnone 1328 1329define <8 x float> @test_mm256_maskload_ps(float* %a0, <4 x i64> %a1) nounwind { 1330; X32-LABEL: test_mm256_maskload_ps: 1331; X32: # BB#0: 1332; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1333; X32-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 1334; X32-NEXT: retl 1335; 1336; X64-LABEL: test_mm256_maskload_ps: 1337; X64: # BB#0: 1338; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 1339; X64-NEXT: retq 1340 %arg0 = bitcast float* %a0 to i8* 1341 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1342 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %arg0, <8 x i32> %arg1) 1343 ret <8 x float> %res 1344} 1345declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readnone 1346 1347define void @test_mm_maskstore_pd(double* %a0, <2 x i64> %a1, <2 x double> %a2) nounwind { 1348; X32-LABEL: test_mm_maskstore_pd: 1349; X32: # BB#0: 1350; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1351; X32-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) 1352; X32-NEXT: retl 1353; 1354; X64-LABEL: test_mm_maskstore_pd: 1355; X64: # BB#0: 1356; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) 1357; X64-NEXT: retq 1358 %arg0 = bitcast double* %a0 to i8* 1359 call void @llvm.x86.avx.maskstore.pd(i8* %arg0, <2 x i64> %a1, <2 x double> %a2) 1360 ret void 1361} 1362declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind readnone 1363 1364define void @test_mm256_maskstore_pd(double* %a0, <4 x i64> %a1, <4 x double> %a2) nounwind { 1365; X32-LABEL: test_mm256_maskstore_pd: 1366; X32: # BB#0: 1367; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1368; X32-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) 1369; X32-NEXT: vzeroupper 1370; X32-NEXT: retl 1371; 1372; X64-LABEL: test_mm256_maskstore_pd: 1373; X64: # BB#0: 1374; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) 1375; X64-NEXT: vzeroupper 1376; X64-NEXT: retq 1377 %arg0 = bitcast double* %a0 to i8* 1378 call void @llvm.x86.avx.maskstore.pd.256(i8* %arg0, <4 x i64> %a1, <4 x double> %a2) 1379 ret void 1380} 1381declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind readnone 1382 1383define void @test_mm_maskstore_ps(float* %a0, <2 x i64> %a1, <4 x float> %a2) nounwind { 1384; X32-LABEL: test_mm_maskstore_ps: 1385; X32: # BB#0: 1386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1387; X32-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) 1388; X32-NEXT: retl 1389; 1390; X64-LABEL: test_mm_maskstore_ps: 1391; X64: # BB#0: 1392; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) 1393; X64-NEXT: retq 1394 %arg0 = bitcast float* %a0 to i8* 1395 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1396 call void @llvm.x86.avx.maskstore.ps(i8* %arg0, <4 x i32> %arg1, <4 x float> %a2) 1397 ret void 1398} 1399declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind readnone 1400 1401define void @test_mm256_maskstore_ps(float* %a0, <4 x i64> %a1, <8 x float> %a2) nounwind { 1402; X32-LABEL: test_mm256_maskstore_ps: 1403; X32: # BB#0: 1404; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1405; X32-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) 1406; X32-NEXT: vzeroupper 1407; X32-NEXT: retl 1408; 1409; X64-LABEL: test_mm256_maskstore_ps: 1410; X64: # BB#0: 1411; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) 1412; X64-NEXT: vzeroupper 1413; X64-NEXT: retq 1414 %arg0 = bitcast float* %a0 to i8* 1415 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1416 call void @llvm.x86.avx.maskstore.ps.256(i8* %arg0, <8 x i32> %arg1, <8 x float> %a2) 1417 ret void 1418} 1419declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind readnone 1420 1421define <4 x double> @test_mm256_max_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 1422; X32-LABEL: test_mm256_max_pd: 1423; X32: # BB#0: 1424; X32-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 1425; X32-NEXT: retl 1426; 1427; X64-LABEL: test_mm256_max_pd: 1428; X64: # BB#0: 1429; X64-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 1430; X64-NEXT: retq 1431 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 1432 ret <4 x double> %res 1433} 1434declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 1435 1436define <8 x float> @test_mm256_max_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 1437; X32-LABEL: test_mm256_max_ps: 1438; X32: # BB#0: 1439; X32-NEXT: vmaxps %ymm1, %ymm0, %ymm0 1440; X32-NEXT: retl 1441; 1442; X64-LABEL: test_mm256_max_ps: 1443; X64: # BB#0: 1444; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0 1445; X64-NEXT: retq 1446 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 1447 ret <8 x float> %res 1448} 1449declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 1450 1451define <4 x double> @test_mm256_min_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 1452; X32-LABEL: test_mm256_min_pd: 1453; X32: # BB#0: 1454; X32-NEXT: vminpd %ymm1, %ymm0, %ymm0 1455; X32-NEXT: retl 1456; 1457; X64-LABEL: test_mm256_min_pd: 1458; X64: # BB#0: 1459; X64-NEXT: vminpd %ymm1, %ymm0, %ymm0 1460; X64-NEXT: retq 1461 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1462 ret <4 x double> %res 1463} 1464declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 1465 1466define <8 x float> @test_mm256_min_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 1467; X32-LABEL: test_mm256_min_ps: 1468; X32: # BB#0: 1469; X32-NEXT: vminps %ymm1, %ymm0, %ymm0 1470; X32-NEXT: retl 1471; 1472; X64-LABEL: test_mm256_min_ps: 1473; X64: # BB#0: 1474; X64-NEXT: vminps %ymm1, %ymm0, %ymm0 1475; X64-NEXT: retq 1476 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1477 ret <8 x float> %res 1478} 1479declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 1480 1481define <4 x double> @test_mm256_movedup_pd(<4 x double> %a0) nounwind { 1482; X32-LABEL: test_mm256_movedup_pd: 1483; X32: # BB#0: 1484; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1485; X32-NEXT: retl 1486; 1487; X64-LABEL: test_mm256_movedup_pd: 1488; X64: # BB#0: 1489; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1490; X64-NEXT: retq 1491 %res = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1492 ret <4 x double> %res 1493} 1494 1495define <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) nounwind { 1496; X32-LABEL: test_mm256_movehdup_ps: 1497; X32: # BB#0: 1498; X32-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1499; X32-NEXT: retl 1500; 1501; X64-LABEL: test_mm256_movehdup_ps: 1502; X64: # BB#0: 1503; X64-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 1504; X64-NEXT: retq 1505 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1506 ret <8 x float> %res 1507} 1508 1509define <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) nounwind { 1510; X32-LABEL: test_mm256_moveldup_ps: 1511; X32: # BB#0: 1512; X32-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1513; X32-NEXT: retl 1514; 1515; X64-LABEL: test_mm256_moveldup_ps: 1516; X64: # BB#0: 1517; X64-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 1518; X64-NEXT: retq 1519 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1520 ret <8 x float> %res 1521} 1522 1523define i32 @test_mm256_movemask_pd(<4 x double> %a0) nounwind { 1524; X32-LABEL: test_mm256_movemask_pd: 1525; X32: # BB#0: 1526; X32-NEXT: vmovmskpd %ymm0, %eax 1527; X32-NEXT: vzeroupper 1528; X32-NEXT: retl 1529; 1530; X64-LABEL: test_mm256_movemask_pd: 1531; X64: # BB#0: 1532; X64-NEXT: vmovmskpd %ymm0, %eax 1533; X64-NEXT: vzeroupper 1534; X64-NEXT: retq 1535 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 1536 ret i32 %res 1537} 1538declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 1539 1540define i32 @test_mm256_movemask_ps(<8 x float> %a0) nounwind { 1541; X32-LABEL: test_mm256_movemask_ps: 1542; X32: # BB#0: 1543; X32-NEXT: vmovmskps %ymm0, %eax 1544; X32-NEXT: vzeroupper 1545; X32-NEXT: retl 1546; 1547; X64-LABEL: test_mm256_movemask_ps: 1548; X64: # BB#0: 1549; X64-NEXT: vmovmskps %ymm0, %eax 1550; X64-NEXT: vzeroupper 1551; X64-NEXT: retq 1552 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 1553 ret i32 %res 1554} 1555declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 1556 1557define <4 x double> @test_mm256_mul_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 1558; X32-LABEL: test_mm256_mul_pd: 1559; X32: # BB#0: 1560; X32-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1561; X32-NEXT: retl 1562; 1563; X64-LABEL: test_mm256_mul_pd: 1564; X64: # BB#0: 1565; X64-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1566; X64-NEXT: retq 1567 %res = fmul <4 x double> %a0, %a1 1568 ret <4 x double> %res 1569} 1570 1571define <8 x float> @test_mm256_mul_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 1572; X32-LABEL: test_mm256_mul_ps: 1573; X32: # BB#0: 1574; X32-NEXT: vmulps %ymm1, %ymm0, %ymm0 1575; X32-NEXT: retl 1576; 1577; X64-LABEL: test_mm256_mul_ps: 1578; X64: # BB#0: 1579; X64-NEXT: vmulps %ymm1, %ymm0, %ymm0 1580; X64-NEXT: retq 1581 %res = fmul <8 x float> %a0, %a1 1582 ret <8 x float> %res 1583} 1584 1585define <4 x double> @test_mm256_or_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 1586; X32-LABEL: test_mm256_or_pd: 1587; X32: # BB#0: 1588; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 1589; X32-NEXT: retl 1590; 1591; X64-LABEL: test_mm256_or_pd: 1592; X64: # BB#0: 1593; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 1594; X64-NEXT: retq 1595 %1 = bitcast <4 x double> %a0 to <4 x i64> 1596 %2 = bitcast <4 x double> %a1 to <4 x i64> 1597 %res = or <4 x i64> %1, %2 1598 %bc = bitcast <4 x i64> %res to <4 x double> 1599 ret <4 x double> %bc 1600} 1601 1602define <8 x float> @test_mm256_or_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 1603; X32-LABEL: test_mm256_or_ps: 1604; X32: # BB#0: 1605; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 1606; X32-NEXT: retl 1607; 1608; X64-LABEL: test_mm256_or_ps: 1609; X64: # BB#0: 1610; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 1611; X64-NEXT: retq 1612 %1 = bitcast <8 x float> %a0 to <8 x i32> 1613 %2 = bitcast <8 x float> %a1 to <8 x i32> 1614 %res = or <8 x i32> %1, %2 1615 %bc = bitcast <8 x i32> %res to <8 x float> 1616 ret <8 x float> %bc 1617} 1618 1619define <2 x double> @test_mm_permute_pd(<2 x double> %a0) nounwind { 1620; X32-LABEL: test_mm_permute_pd: 1621; X32: # BB#0: 1622; X32-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1623; X32-NEXT: retl 1624; 1625; X64-LABEL: test_mm_permute_pd: 1626; X64: # BB#0: 1627; X64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 1628; X64-NEXT: retq 1629 %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> <i32 1, i32 0> 1630 ret <2 x double> %res 1631} 1632 1633define <4 x double> @test_mm256_permute_pd(<4 x double> %a0) nounwind { 1634; X32-LABEL: test_mm256_permute_pd: 1635; X32: # BB#0: 1636; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1637; X32-NEXT: retl 1638; 1639; X64-LABEL: test_mm256_permute_pd: 1640; X64: # BB#0: 1641; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1642; X64-NEXT: retq 1643 %res = shufflevector <4 x double> %a0, <4 x double> %a0, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 1644 ret <4 x double> %res 1645} 1646 1647define <4 x float> @test_mm_permute_ps(<4 x float> %a0) nounwind { 1648; X32-LABEL: test_mm_permute_ps: 1649; X32: # BB#0: 1650; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1651; X32-NEXT: retl 1652; 1653; X64-LABEL: test_mm_permute_ps: 1654; X64: # BB#0: 1655; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1656; X64-NEXT: retq 1657 %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1658 ret <4 x float> %res 1659} 1660 1661define <4 x float> @test2_mm_permute_ps(<4 x float> %a0) nounwind { 1662; X32-LABEL: test2_mm_permute_ps: 1663; X32: # BB#0: 1664; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,3] 1665; X32-NEXT: retl 1666; 1667; X64-LABEL: test2_mm_permute_ps: 1668; X64: # BB#0: 1669; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,2,3] 1670; X64-NEXT: retq 1671 %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 2, i32 1, i32 2, i32 3> 1672 ret <4 x float> %res 1673} 1674 1675define <8 x float> @test_mm256_permute_ps(<8 x float> %a0) nounwind { 1676; X32-LABEL: test_mm256_permute_ps: 1677; X32: # BB#0: 1678; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1679; X32-NEXT: retl 1680; 1681; X64-LABEL: test_mm256_permute_ps: 1682; X64: # BB#0: 1683; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] 1684; X64-NEXT: retq 1685 %res = shufflevector <8 x float> %a0, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1686 ret <8 x float> %res 1687} 1688 1689define <4 x double> @test_mm256_permute2f128_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 1690; X32-LABEL: test_mm256_permute2f128_pd: 1691; X32: # BB#0: 1692; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm1[0,1] 1693; X32-NEXT: retl 1694; 1695; X64-LABEL: test_mm256_permute2f128_pd: 1696; X64: # BB#0: 1697; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm1[0,1] 1698; X64-NEXT: retq 1699 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 44) 1700 ret <4 x double> %res 1701} 1702declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 1703 1704; PR26667 1705define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 1706; X32-LABEL: test_mm256_permute2f128_ps: 1707; X32: # BB#0: 1708; X32-NEXT: vmovaps %ymm1, %ymm0 1709; X32-NEXT: retl 1710; 1711; X64-LABEL: test_mm256_permute2f128_ps: 1712; X64: # BB#0: 1713; X64-NEXT: vmovaps %ymm1, %ymm0 1714; X64-NEXT: retq 1715 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50) 1716 ret <8 x float> %res 1717} 1718declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1719 1720define <4 x i64> @test_mm256_permute2f128_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 1721; X32-LABEL: test_mm256_permute2f128_si256: 1722; X32: # BB#0: 1723; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1] 1724; X32-NEXT: retl 1725; 1726; X64-LABEL: test_mm256_permute2f128_si256: 1727; X64: # BB#0: 1728; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1] 1729; X64-NEXT: retq 1730 %1 = bitcast <4 x i64> %a0 to <8 x i32> 1731 %2 = bitcast <4 x i64> %a1 to <8 x i32> 1732 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %1, <8 x i32> %2, i8 35) 1733 %bc = bitcast <8 x i32> %res to <4 x i64> 1734 ret <4 x i64> %bc 1735} 1736declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 1737 1738define <2 x double> @test_mm_permutevar_pd(<2 x double> %a0, <2 x i64> %a1) nounwind { 1739; X32-LABEL: test_mm_permutevar_pd: 1740; X32: # BB#0: 1741; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 1742; X32-NEXT: retl 1743; 1744; X64-LABEL: test_mm_permutevar_pd: 1745; X64: # BB#0: 1746; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 1747; X64-NEXT: retq 1748 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 1749 ret <2 x double> %res 1750} 1751declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 1752 1753define <4 x double> @test_mm256_permutevar_pd(<4 x double> %a0, <4 x i64> %a1) nounwind { 1754; X32-LABEL: test_mm256_permutevar_pd: 1755; X32: # BB#0: 1756; X32-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 1757; X32-NEXT: retl 1758; 1759; X64-LABEL: test_mm256_permutevar_pd: 1760; X64: # BB#0: 1761; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 1762; X64-NEXT: retq 1763 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 1764 ret <4 x double> %res 1765} 1766declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 1767 1768define <4 x float> @test_mm_permutevar_ps(<4 x float> %a0, <2 x i64> %a1) nounwind { 1769; X32-LABEL: test_mm_permutevar_ps: 1770; X32: # BB#0: 1771; X32-NEXT: vpermilps %xmm1, %xmm0, %xmm0 1772; X32-NEXT: retl 1773; 1774; X64-LABEL: test_mm_permutevar_ps: 1775; X64: # BB#0: 1776; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm0 1777; X64-NEXT: retq 1778 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1779 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %arg1) 1780 ret <4 x float> %res 1781} 1782declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 1783 1784define <8 x float> @test_mm256_permutevar_ps(<8 x float> %a0, <4 x i64> %a1) nounwind { 1785; X32-LABEL: test_mm256_permutevar_ps: 1786; X32: # BB#0: 1787; X32-NEXT: vpermilps %ymm1, %ymm0, %ymm0 1788; X32-NEXT: retl 1789; 1790; X64-LABEL: test_mm256_permutevar_ps: 1791; X64: # BB#0: 1792; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm0 1793; X64-NEXT: retq 1794 %arg1 = bitcast <4 x i64> %a1 to <8 x i32> 1795 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %arg1) 1796 ret <8 x float> %res 1797} 1798declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 1799 1800define <8 x float> @test_mm256_rcp_ps(<8 x float> %a0) nounwind { 1801; X32-LABEL: test_mm256_rcp_ps: 1802; X32: # BB#0: 1803; X32-NEXT: vrcpps %ymm0, %ymm0 1804; X32-NEXT: retl 1805; 1806; X64-LABEL: test_mm256_rcp_ps: 1807; X64: # BB#0: 1808; X64-NEXT: vrcpps %ymm0, %ymm0 1809; X64-NEXT: retq 1810 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 1811 ret <8 x float> %res 1812} 1813declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 1814 1815define <4 x double> @test_mm256_round_pd(<4 x double> %a0) nounwind { 1816; X32-LABEL: test_mm256_round_pd: 1817; X32: # BB#0: 1818; X32-NEXT: vroundpd $4, %ymm0, %ymm0 1819; X32-NEXT: retl 1820; 1821; X64-LABEL: test_mm256_round_pd: 1822; X64: # BB#0: 1823; X64-NEXT: vroundpd $4, %ymm0, %ymm0 1824; X64-NEXT: retq 1825 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 4) 1826 ret <4 x double> %res 1827} 1828 1829define <8 x float> @test_mm256_round_ps(<8 x float> %a0) nounwind { 1830; X32-LABEL: test_mm256_round_ps: 1831; X32: # BB#0: 1832; X32-NEXT: vroundps $4, %ymm0, %ymm0 1833; X32-NEXT: retl 1834; 1835; X64-LABEL: test_mm256_round_ps: 1836; X64: # BB#0: 1837; X64-NEXT: vroundps $4, %ymm0, %ymm0 1838; X64-NEXT: retq 1839 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 4) 1840 ret <8 x float> %res 1841} 1842 1843define <8 x float> @test_mm256_rsqrt_ps(<8 x float> %a0) nounwind { 1844; X32-LABEL: test_mm256_rsqrt_ps: 1845; X32: # BB#0: 1846; X32-NEXT: vrsqrtps %ymm0, %ymm0 1847; X32-NEXT: retl 1848; 1849; X64-LABEL: test_mm256_rsqrt_ps: 1850; X64: # BB#0: 1851; X64-NEXT: vrsqrtps %ymm0, %ymm0 1852; X64-NEXT: retq 1853 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 1854 ret <8 x float> %res 1855} 1856declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 1857 1858define <4 x i64> @test_mm256_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { 1859; X32-LABEL: test_mm256_set_epi8: 1860; X32: # BB#0: 1861; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1862; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1863; X32-NEXT: vmovd %ecx, %xmm0 1864; X32-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 1865; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1866; X32-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1867; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1868; X32-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 1869; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1870; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1871; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1872; X32-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 1873; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1874; X32-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1875; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1876; X32-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 1877; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1878; X32-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1879; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1880; X32-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1881; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1882; X32-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1883; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1884; X32-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1885; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1886; X32-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1887; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1888; X32-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1889; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1890; X32-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1891; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1892; X32-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1893; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1894; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1895; X32-NEXT: vmovd %ecx, %xmm1 1896; X32-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1897; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1898; X32-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1899; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1900; X32-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1901; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1902; X32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1903; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1904; X32-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1905; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1906; X32-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1907; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1908; X32-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 1909; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1910; X32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1911; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1912; X32-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 1913; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1914; X32-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 1915; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1916; X32-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 1917; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1918; X32-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 1919; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1920; X32-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 1921; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1922; X32-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 1923; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1924; X32-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1925; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1926; X32-NEXT: retl 1927; 1928; X64-LABEL: test_mm256_set_epi8: 1929; X64: # BB#0: 1930; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 1931; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1932; X64-NEXT: vmovd %eax, %xmm0 1933; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 1934; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1935; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 1936; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1937; X64-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 1938; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1939; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 1940; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1941; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 1942; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1943; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 1944; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1945; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 1946; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1947; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1948; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1949; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1950; X64-NEXT: movzbl %r9b, %eax 1951; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1952; X64-NEXT: movzbl %r8b, %eax 1953; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1954; X64-NEXT: movzbl %cl, %eax 1955; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1956; X64-NEXT: movzbl %dl, %eax 1957; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1958; X64-NEXT: movzbl %sil, %eax 1959; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1960; X64-NEXT: movzbl %dil, %eax 1961; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1962; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1963; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 1964; X64-NEXT: vmovd %ecx, %xmm1 1965; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1966; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1967; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1968; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1969; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1970; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1971; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1972; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1973; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1974; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1975; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1976; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1977; X64-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 1978; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1979; X64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1980; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1981; X64-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 1982; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1983; X64-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 1984; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1985; X64-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 1986; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1987; X64-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 1988; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1989; X64-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 1990; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1991; X64-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 1992; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 1993; X64-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1994; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1995; X64-NEXT: retq 1996 %res0 = insertelement <32 x i8> undef, i8 %a31, i32 0 1997 %res1 = insertelement <32 x i8> %res0, i8 %a30, i32 1 1998 %res2 = insertelement <32 x i8> %res1, i8 %a29, i32 2 1999 %res3 = insertelement <32 x i8> %res2, i8 %a28, i32 3 2000 %res4 = insertelement <32 x i8> %res3, i8 %a27, i32 4 2001 %res5 = insertelement <32 x i8> %res4, i8 %a26, i32 5 2002 %res6 = insertelement <32 x i8> %res5, i8 %a25, i32 6 2003 %res7 = insertelement <32 x i8> %res6, i8 %a24, i32 7 2004 %res8 = insertelement <32 x i8> %res7, i8 %a23, i32 8 2005 %res9 = insertelement <32 x i8> %res8, i8 %a22, i32 9 2006 %res10 = insertelement <32 x i8> %res9, i8 %a21, i32 10 2007 %res11 = insertelement <32 x i8> %res10, i8 %a20, i32 11 2008 %res12 = insertelement <32 x i8> %res11, i8 %a19, i32 12 2009 %res13 = insertelement <32 x i8> %res12, i8 %a18, i32 13 2010 %res14 = insertelement <32 x i8> %res13, i8 %a17, i32 14 2011 %res15 = insertelement <32 x i8> %res14, i8 %a16, i32 15 2012 %res16 = insertelement <32 x i8> %res15, i8 %a15, i32 16 2013 %res17 = insertelement <32 x i8> %res16, i8 %a14, i32 17 2014 %res18 = insertelement <32 x i8> %res17, i8 %a13, i32 18 2015 %res19 = insertelement <32 x i8> %res18, i8 %a12, i32 19 2016 %res20 = insertelement <32 x i8> %res19, i8 %a11, i32 20 2017 %res21 = insertelement <32 x i8> %res20, i8 %a10, i32 21 2018 %res22 = insertelement <32 x i8> %res21, i8 %a9 , i32 22 2019 %res23 = insertelement <32 x i8> %res22, i8 %a8 , i32 23 2020 %res24 = insertelement <32 x i8> %res23, i8 %a7 , i32 24 2021 %res25 = insertelement <32 x i8> %res24, i8 %a6 , i32 25 2022 %res26 = insertelement <32 x i8> %res25, i8 %a5 , i32 26 2023 %res27 = insertelement <32 x i8> %res26, i8 %a4 , i32 27 2024 %res28 = insertelement <32 x i8> %res27, i8 %a3 , i32 28 2025 %res29 = insertelement <32 x i8> %res28, i8 %a2 , i32 29 2026 %res30 = insertelement <32 x i8> %res29, i8 %a1 , i32 30 2027 %res31 = insertelement <32 x i8> %res30, i8 %a0 , i32 31 2028 %res = bitcast <32 x i8> %res31 to <4 x i64> 2029 ret <4 x i64> %res 2030} 2031 2032define <4 x i64> @test_mm256_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { 2033; X32-LABEL: test_mm256_set_epi16: 2034; X32: # BB#0: 2035; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2036; X32-NEXT: vmovd %eax, %xmm0 2037; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2038; X32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2039; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2040; X32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 2041; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2042; X32-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 2043; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2044; X32-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 2045; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2046; X32-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 2047; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2048; X32-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 2049; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2050; X32-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 2051; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2052; X32-NEXT: vmovd %eax, %xmm1 2053; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2054; X32-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 2055; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2056; X32-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 2057; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2058; X32-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1 2059; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2060; X32-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 2061; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2062; X32-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 2063; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2064; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 2065; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2066; X32-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 2067; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2068; X32-NEXT: retl 2069; 2070; X64-LABEL: test_mm256_set_epi16: 2071; X64: # BB#0: 2072; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2073; X64-NEXT: vmovd %eax, %xmm0 2074; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2075; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2076; X64-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 2077; X64-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 2078; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 2079; X64-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 2080; X64-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 2081; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 2082; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2083; X64-NEXT: vmovd %eax, %xmm1 2084; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2085; X64-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 2086; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2087; X64-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 2088; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2089; X64-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1 2090; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2091; X64-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 2092; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2093; X64-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 2094; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2095; X64-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 2096; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2097; X64-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 2098; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2099; X64-NEXT: retq 2100 %res0 = insertelement <16 x i16> undef, i16 %a15, i32 0 2101 %res1 = insertelement <16 x i16> %res0, i16 %a14, i32 1 2102 %res2 = insertelement <16 x i16> %res1, i16 %a13, i32 2 2103 %res3 = insertelement <16 x i16> %res2, i16 %a12, i32 3 2104 %res4 = insertelement <16 x i16> %res3, i16 %a11, i32 4 2105 %res5 = insertelement <16 x i16> %res4, i16 %a10, i32 5 2106 %res6 = insertelement <16 x i16> %res5, i16 %a9 , i32 6 2107 %res7 = insertelement <16 x i16> %res6, i16 %a8 , i32 7 2108 %res8 = insertelement <16 x i16> %res7, i16 %a7 , i32 8 2109 %res9 = insertelement <16 x i16> %res8, i16 %a6 , i32 9 2110 %res10 = insertelement <16 x i16> %res9, i16 %a5 , i32 10 2111 %res11 = insertelement <16 x i16> %res10, i16 %a4 , i32 11 2112 %res12 = insertelement <16 x i16> %res11, i16 %a3 , i32 12 2113 %res13 = insertelement <16 x i16> %res12, i16 %a2 , i32 13 2114 %res14 = insertelement <16 x i16> %res13, i16 %a1 , i32 14 2115 %res15 = insertelement <16 x i16> %res14, i16 %a0 , i32 15 2116 %res = bitcast <16 x i16> %res15 to <4 x i64> 2117 ret <4 x i64> %res 2118} 2119 2120define <4 x i64> @test_mm256_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { 2121; X32-LABEL: test_mm256_set_epi32: 2122; X32: # BB#0: 2123; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2124; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2125; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2126; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2127; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2128; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 2129; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 2130; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 2131; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2132; X32-NEXT: retl 2133; 2134; X64-LABEL: test_mm256_set_epi32: 2135; X64: # BB#0: 2136; X64-NEXT: vmovd %ecx, %xmm0 2137; X64-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 2138; X64-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 2139; X64-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 2140; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2141; X64-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1 2142; X64-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1 2143; X64-NEXT: vpinsrd $3, %r8d, %xmm1, %xmm1 2144; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2145; X64-NEXT: retq 2146 %res0 = insertelement <8 x i32> undef, i32 %a7, i32 0 2147 %res1 = insertelement <8 x i32> %res0, i32 %a6, i32 1 2148 %res2 = insertelement <8 x i32> %res1, i32 %a5, i32 2 2149 %res3 = insertelement <8 x i32> %res2, i32 %a4, i32 3 2150 %res4 = insertelement <8 x i32> %res3, i32 %a3, i32 4 2151 %res5 = insertelement <8 x i32> %res4, i32 %a2, i32 5 2152 %res6 = insertelement <8 x i32> %res5, i32 %a1, i32 6 2153 %res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7 2154 %res = bitcast <8 x i32> %res7 to <4 x i64> 2155 ret <4 x i64> %res 2156} 2157 2158define <4 x i64> @test_mm256_set_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { 2159; X32-LABEL: test_mm256_set_epi64x: 2160; X32: # BB#0: 2161; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2162; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2163; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2164; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2165; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2166; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 2167; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 2168; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 2169; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2170; X32-NEXT: retl 2171; 2172; X64-LABEL: test_mm256_set_epi64x: 2173; X64: # BB#0: 2174; X64-NEXT: vmovq %rdi, %xmm0 2175; X64-NEXT: vmovq %rsi, %xmm1 2176; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2177; X64-NEXT: vmovq %rdx, %xmm1 2178; X64-NEXT: vmovq %rcx, %xmm2 2179; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 2180; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2181; X64-NEXT: retq 2182 %res0 = insertelement <4 x i64> undef, i64 %a3, i32 0 2183 %res1 = insertelement <4 x i64> %res0, i64 %a2, i32 1 2184 %res2 = insertelement <4 x i64> %res1, i64 %a1, i32 2 2185 %res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3 2186 ret <4 x i64> %res3 2187} 2188 2189define <8 x float> @test_mm256_set_m128(<4 x float> %a0, <4 x float> %a1) nounwind { 2190; X32-LABEL: test_mm256_set_m128: 2191; X32: # BB#0: 2192; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2193; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2194; X32-NEXT: retl 2195; 2196; X64-LABEL: test_mm256_set_m128: 2197; X64: # BB#0: 2198; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2199; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2200; X64-NEXT: retq 2201 %res = shufflevector <4 x float> %a1, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2202 ret <8 x float> %res 2203} 2204 2205define <4 x double> @test_mm256_set_m128d(<2 x double> %a0, <2 x double> %a1) nounwind { 2206; X32-LABEL: test_mm256_set_m128d: 2207; X32: # BB#0: 2208; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2209; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2210; X32-NEXT: retl 2211; 2212; X64-LABEL: test_mm256_set_m128d: 2213; X64: # BB#0: 2214; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2215; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2216; X64-NEXT: retq 2217 %arg0 = bitcast <2 x double> %a0 to <4 x float> 2218 %arg1 = bitcast <2 x double> %a1 to <4 x float> 2219 %res = shufflevector <4 x float> %arg1, <4 x float> %arg0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2220 %bc = bitcast <8 x float> %res to <4 x double> 2221 ret <4 x double> %bc 2222} 2223 2224define <4 x i64> @test_mm256_set_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2225; X32-LABEL: test_mm256_set_m128i: 2226; X32: # BB#0: 2227; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2228; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2229; X32-NEXT: retl 2230; 2231; X64-LABEL: test_mm256_set_m128i: 2232; X64: # BB#0: 2233; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def> 2234; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2235; X64-NEXT: retq 2236 %arg0 = bitcast <2 x i64> %a0 to <4 x float> 2237 %arg1 = bitcast <2 x i64> %a1 to <4 x float> 2238 %res = shufflevector <4 x float> %arg1, <4 x float> %arg0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2239 %bc = bitcast <8 x float> %res to <4 x i64> 2240 ret <4 x i64> %bc 2241} 2242 2243define <4 x double> @test_mm256_set_pd(double %a0, double %a1, double %a2, double %a3) nounwind { 2244; X32-LABEL: test_mm256_set_pd: 2245; X32: # BB#0: 2246; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2247; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2248; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 2249; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 2250; X32-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2251; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2252; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2253; X32-NEXT: retl 2254; 2255; X64-LABEL: test_mm256_set_pd: 2256; X64: # BB#0: 2257; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2258; X64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0] 2259; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2260; X64-NEXT: retq 2261 %res0 = insertelement <4 x double> undef, double %a3, i32 0 2262 %res1 = insertelement <4 x double> %res0, double %a2, i32 1 2263 %res2 = insertelement <4 x double> %res1, double %a1, i32 2 2264 %res3 = insertelement <4 x double> %res2, double %a0, i32 3 2265 ret <4 x double> %res3 2266} 2267 2268define <8 x float> @test_mm256_set_ps(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { 2269; X32-LABEL: test_mm256_set_ps: 2270; X32: # BB#0: 2271; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2272; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2273; X32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2274; X32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 2275; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 2276; X32-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero 2277; X32-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero 2278; X32-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero 2279; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 2280; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 2281; X32-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 2282; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2283; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2284; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 2285; X32-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 2286; X32-NEXT: retl 2287; 2288; X64-LABEL: test_mm256_set_ps: 2289; X64: # BB#0: 2290; X64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2291; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2292; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2293; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm6[0],xmm7[2,3] 2294; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3] 2295; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] 2296; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2297; X64-NEXT: retq 2298 %res0 = insertelement <8 x float> undef, float %a7, i32 0 2299 %res1 = insertelement <8 x float> %res0, float %a6, i32 1 2300 %res2 = insertelement <8 x float> %res1, float %a5, i32 2 2301 %res3 = insertelement <8 x float> %res2, float %a4, i32 3 2302 %res4 = insertelement <8 x float> %res3, float %a3, i32 4 2303 %res5 = insertelement <8 x float> %res4, float %a2, i32 5 2304 %res6 = insertelement <8 x float> %res5, float %a1, i32 6 2305 %res7 = insertelement <8 x float> %res6, float %a0, i32 7 2306 ret <8 x float> %res7 2307} 2308 2309define <4 x i64> @test_mm256_set1_epi8(i8 %a0) nounwind { 2310; X32-LABEL: test_mm256_set1_epi8: 2311; X32: # BB#0: 2312; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2313; X32-NEXT: vmovd %eax, %xmm0 2314; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 2315; X32-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2316; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2317; X32-NEXT: retl 2318; 2319; X64-LABEL: test_mm256_set1_epi8: 2320; X64: # BB#0: 2321; X64-NEXT: movzbl %dil, %eax 2322; X64-NEXT: vmovd %eax, %xmm0 2323; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 2324; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2325; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2326; X64-NEXT: retq 2327 %res0 = insertelement <32 x i8> undef, i8 %a0, i32 0 2328 %res1 = insertelement <32 x i8> %res0, i8 %a0, i32 1 2329 %res2 = insertelement <32 x i8> %res1, i8 %a0, i32 2 2330 %res3 = insertelement <32 x i8> %res2, i8 %a0, i32 3 2331 %res4 = insertelement <32 x i8> %res3, i8 %a0, i32 4 2332 %res5 = insertelement <32 x i8> %res4, i8 %a0, i32 5 2333 %res6 = insertelement <32 x i8> %res5, i8 %a0, i32 6 2334 %res7 = insertelement <32 x i8> %res6, i8 %a0, i32 7 2335 %res8 = insertelement <32 x i8> %res7, i8 %a0, i32 8 2336 %res9 = insertelement <32 x i8> %res8, i8 %a0, i32 9 2337 %res10 = insertelement <32 x i8> %res9, i8 %a0, i32 10 2338 %res11 = insertelement <32 x i8> %res10, i8 %a0, i32 11 2339 %res12 = insertelement <32 x i8> %res11, i8 %a0, i32 12 2340 %res13 = insertelement <32 x i8> %res12, i8 %a0, i32 13 2341 %res14 = insertelement <32 x i8> %res13, i8 %a0, i32 14 2342 %res15 = insertelement <32 x i8> %res14, i8 %a0, i32 15 2343 %res16 = insertelement <32 x i8> %res15, i8 %a0, i32 16 2344 %res17 = insertelement <32 x i8> %res16, i8 %a0, i32 17 2345 %res18 = insertelement <32 x i8> %res17, i8 %a0, i32 18 2346 %res19 = insertelement <32 x i8> %res18, i8 %a0, i32 19 2347 %res20 = insertelement <32 x i8> %res19, i8 %a0, i32 20 2348 %res21 = insertelement <32 x i8> %res20, i8 %a0, i32 21 2349 %res22 = insertelement <32 x i8> %res21, i8 %a0, i32 22 2350 %res23 = insertelement <32 x i8> %res22, i8 %a0, i32 23 2351 %res24 = insertelement <32 x i8> %res23, i8 %a0, i32 24 2352 %res25 = insertelement <32 x i8> %res24, i8 %a0, i32 25 2353 %res26 = insertelement <32 x i8> %res25, i8 %a0, i32 26 2354 %res27 = insertelement <32 x i8> %res26, i8 %a0, i32 27 2355 %res28 = insertelement <32 x i8> %res27, i8 %a0, i32 28 2356 %res29 = insertelement <32 x i8> %res28, i8 %a0, i32 29 2357 %res30 = insertelement <32 x i8> %res29, i8 %a0, i32 30 2358 %res31 = insertelement <32 x i8> %res30, i8 %a0, i32 31 2359 %res = bitcast <32 x i8> %res31 to <4 x i64> 2360 ret <4 x i64> %res 2361} 2362 2363define <4 x i64> @test_mm256_set1_epi16(i16 %a0) nounwind { 2364; X32-LABEL: test_mm256_set1_epi16: 2365; X32: # BB#0: 2366; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2367; X32-NEXT: vmovd %eax, %xmm0 2368; X32-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2369; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2370; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2371; X32-NEXT: retl 2372; 2373; X64-LABEL: test_mm256_set1_epi16: 2374; X64: # BB#0: 2375; X64-NEXT: vmovd %edi, %xmm0 2376; X64-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2377; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2378; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2379; X64-NEXT: retq 2380 %res0 = insertelement <16 x i16> undef, i16 %a0, i32 0 2381 %res1 = insertelement <16 x i16> %res0, i16 %a0, i32 1 2382 %res2 = insertelement <16 x i16> %res1, i16 %a0, i32 2 2383 %res3 = insertelement <16 x i16> %res2, i16 %a0, i32 3 2384 %res4 = insertelement <16 x i16> %res3, i16 %a0, i32 4 2385 %res5 = insertelement <16 x i16> %res4, i16 %a0, i32 5 2386 %res6 = insertelement <16 x i16> %res5, i16 %a0, i32 6 2387 %res7 = insertelement <16 x i16> %res6, i16 %a0, i32 7 2388 %res8 = insertelement <16 x i16> %res7, i16 %a0, i32 8 2389 %res9 = insertelement <16 x i16> %res8, i16 %a0, i32 9 2390 %res10 = insertelement <16 x i16> %res9, i16 %a0, i32 10 2391 %res11 = insertelement <16 x i16> %res10, i16 %a0, i32 11 2392 %res12 = insertelement <16 x i16> %res11, i16 %a0, i32 12 2393 %res13 = insertelement <16 x i16> %res12, i16 %a0, i32 13 2394 %res14 = insertelement <16 x i16> %res13, i16 %a0, i32 14 2395 %res15 = insertelement <16 x i16> %res14, i16 %a0, i32 15 2396 %res = bitcast <16 x i16> %res15 to <4 x i64> 2397 ret <4 x i64> %res 2398} 2399 2400define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind { 2401; X32-LABEL: test_mm256_set1_epi32: 2402; X32: # BB#0: 2403; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2404; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2405; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2406; X32-NEXT: retl 2407; 2408; X64-LABEL: test_mm256_set1_epi32: 2409; X64: # BB#0: 2410; X64-NEXT: vmovd %edi, %xmm0 2411; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2412; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2413; X64-NEXT: retq 2414 %res0 = insertelement <8 x i32> undef, i32 %a0, i32 0 2415 %res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1 2416 %res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2 2417 %res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3 2418 %res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4 2419 %res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5 2420 %res6 = insertelement <8 x i32> %res5, i32 %a0, i32 6 2421 %res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7 2422 %res = bitcast <8 x i32> %res7 to <4 x i64> 2423 ret <4 x i64> %res 2424} 2425 2426define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind { 2427; X32-LABEL: test_mm256_set1_epi64x: 2428; X32: # BB#0: 2429; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 2430; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2431; X32-NEXT: vmovd %ecx, %xmm0 2432; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 2433; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2434; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 2435; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2436; X32-NEXT: retl 2437; 2438; X64-LABEL: test_mm256_set1_epi64x: 2439; X64: # BB#0: 2440; X64-NEXT: vmovq %rdi, %xmm0 2441; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2442; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2443; X64-NEXT: retq 2444 %res0 = insertelement <4 x i64> undef, i64 %a0, i32 0 2445 %res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1 2446 %res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2 2447 %res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3 2448 ret <4 x i64> %res3 2449} 2450 2451define <4 x double> @test_mm256_set1_pd(double %a0) nounwind { 2452; X32-LABEL: test_mm256_set1_pd: 2453; X32: # BB#0: 2454; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2455; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2456; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2457; X32-NEXT: retl 2458; 2459; X64-LABEL: test_mm256_set1_pd: 2460; X64: # BB#0: 2461; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 2462; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2463; X64-NEXT: retq 2464 %res0 = insertelement <4 x double> undef, double %a0, i32 0 2465 %res1 = insertelement <4 x double> %res0, double %a0, i32 1 2466 %res2 = insertelement <4 x double> %res1, double %a0, i32 2 2467 %res3 = insertelement <4 x double> %res2, double %a0, i32 3 2468 ret <4 x double> %res3 2469} 2470 2471define <8 x float> @test_mm256_set1_ps(float %a0) nounwind { 2472; X32-LABEL: test_mm256_set1_ps: 2473; X32: # BB#0: 2474; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2475; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2476; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2477; X32-NEXT: retl 2478; 2479; X64-LABEL: test_mm256_set1_ps: 2480; X64: # BB#0: 2481; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2482; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2483; X64-NEXT: retq 2484 %res0 = insertelement <8 x float> undef, float %a0, i32 0 2485 %res1 = insertelement <8 x float> %res0, float %a0, i32 1 2486 %res2 = insertelement <8 x float> %res1, float %a0, i32 2 2487 %res3 = insertelement <8 x float> %res2, float %a0, i32 3 2488 %res4 = insertelement <8 x float> %res3, float %a0, i32 4 2489 %res5 = insertelement <8 x float> %res4, float %a0, i32 5 2490 %res6 = insertelement <8 x float> %res5, float %a0, i32 6 2491 %res7 = insertelement <8 x float> %res6, float %a0, i32 7 2492 ret <8 x float> %res7 2493} 2494 2495define <4 x i64> @test_mm256_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { 2496; X32-LABEL: test_mm256_setr_epi8: 2497; X32: # BB#0: 2498; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2499; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2500; X32-NEXT: vmovd %ecx, %xmm0 2501; X32-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 2502; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2503; X32-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 2504; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2505; X32-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 2506; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2507; X32-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 2508; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2509; X32-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 2510; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2511; X32-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 2512; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2513; X32-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 2514; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2515; X32-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 2516; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2517; X32-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 2518; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2519; X32-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 2520; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2521; X32-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 2522; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2523; X32-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 2524; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2525; X32-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 2526; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2527; X32-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 2528; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2529; X32-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2530; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2531; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2532; X32-NEXT: vmovd %ecx, %xmm1 2533; X32-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 2534; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2535; X32-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 2536; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2537; X32-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 2538; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2539; X32-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 2540; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2541; X32-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 2542; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2543; X32-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 2544; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2545; X32-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 2546; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2547; X32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 2548; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2549; X32-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 2550; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2551; X32-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 2552; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2553; X32-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 2554; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2555; X32-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 2556; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2557; X32-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 2558; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2559; X32-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 2560; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2561; X32-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 2562; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2563; X32-NEXT: retl 2564; 2565; X64-LABEL: test_mm256_setr_epi8: 2566; X64: # BB#0: 2567; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2568; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2569; X64-NEXT: vmovd %eax, %xmm0 2570; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 2571; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2572; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 2573; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2574; X64-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 2575; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2576; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 2577; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2578; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 2579; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2580; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 2581; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2582; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 2583; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2584; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 2585; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2586; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 2587; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2588; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 2589; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2590; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 2591; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2592; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 2593; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2594; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 2595; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2596; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 2597; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2598; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2599; X64-NEXT: movzbl %sil, %eax 2600; X64-NEXT: movzbl %dil, %esi 2601; X64-NEXT: vmovd %esi, %xmm1 2602; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 2603; X64-NEXT: movzbl %dl, %eax 2604; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 2605; X64-NEXT: movzbl %cl, %eax 2606; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 2607; X64-NEXT: movzbl %r8b, %eax 2608; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 2609; X64-NEXT: movzbl %r9b, %eax 2610; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 2611; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2612; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 2613; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2614; X64-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 2615; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2616; X64-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 2617; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2618; X64-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 2619; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2620; X64-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 2621; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2622; X64-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 2623; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2624; X64-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 2625; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2626; X64-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 2627; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2628; X64-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 2629; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2630; X64-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 2631; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2632; X64-NEXT: retq 2633 %res0 = insertelement <32 x i8> undef, i8 %a0 , i32 0 2634 %res1 = insertelement <32 x i8> %res0, i8 %a1 , i32 1 2635 %res2 = insertelement <32 x i8> %res1, i8 %a2 , i32 2 2636 %res3 = insertelement <32 x i8> %res2, i8 %a3 , i32 3 2637 %res4 = insertelement <32 x i8> %res3, i8 %a4 , i32 4 2638 %res5 = insertelement <32 x i8> %res4, i8 %a5 , i32 5 2639 %res6 = insertelement <32 x i8> %res5, i8 %a6 , i32 6 2640 %res7 = insertelement <32 x i8> %res6, i8 %a7 , i32 7 2641 %res8 = insertelement <32 x i8> %res7, i8 %a8 , i32 8 2642 %res9 = insertelement <32 x i8> %res8, i8 %a9 , i32 9 2643 %res10 = insertelement <32 x i8> %res9, i8 %a10, i32 10 2644 %res11 = insertelement <32 x i8> %res10, i8 %a11, i32 11 2645 %res12 = insertelement <32 x i8> %res11, i8 %a12, i32 12 2646 %res13 = insertelement <32 x i8> %res12, i8 %a13, i32 13 2647 %res14 = insertelement <32 x i8> %res13, i8 %a14, i32 14 2648 %res15 = insertelement <32 x i8> %res14, i8 %a15, i32 15 2649 %res16 = insertelement <32 x i8> %res15, i8 %a16, i32 16 2650 %res17 = insertelement <32 x i8> %res16, i8 %a17, i32 17 2651 %res18 = insertelement <32 x i8> %res17, i8 %a18, i32 18 2652 %res19 = insertelement <32 x i8> %res18, i8 %a19, i32 19 2653 %res20 = insertelement <32 x i8> %res19, i8 %a20, i32 20 2654 %res21 = insertelement <32 x i8> %res20, i8 %a21, i32 21 2655 %res22 = insertelement <32 x i8> %res21, i8 %a22, i32 22 2656 %res23 = insertelement <32 x i8> %res22, i8 %a23, i32 23 2657 %res24 = insertelement <32 x i8> %res23, i8 %a24, i32 24 2658 %res25 = insertelement <32 x i8> %res24, i8 %a25, i32 25 2659 %res26 = insertelement <32 x i8> %res25, i8 %a26, i32 26 2660 %res27 = insertelement <32 x i8> %res26, i8 %a27, i32 27 2661 %res28 = insertelement <32 x i8> %res27, i8 %a28, i32 28 2662 %res29 = insertelement <32 x i8> %res28, i8 %a29, i32 29 2663 %res30 = insertelement <32 x i8> %res29, i8 %a30, i32 30 2664 %res31 = insertelement <32 x i8> %res30, i8 %a31, i32 31 2665 %res = bitcast <32 x i8> %res31 to <4 x i64> 2666 ret <4 x i64> %res 2667} 2668 2669define <4 x i64> @test_mm256_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { 2670; X32-LABEL: test_mm256_setr_epi16: 2671; X32: # BB#0: 2672; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2673; X32-NEXT: vmovd %eax, %xmm0 2674; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2675; X32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2676; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2677; X32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 2678; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2679; X32-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 2680; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2681; X32-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 2682; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2683; X32-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 2684; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2685; X32-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 2686; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2687; X32-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 2688; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2689; X32-NEXT: vmovd %eax, %xmm1 2690; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2691; X32-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 2692; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2693; X32-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 2694; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2695; X32-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1 2696; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2697; X32-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 2698; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2699; X32-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 2700; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2701; X32-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 2702; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2703; X32-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 2704; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2705; X32-NEXT: retl 2706; 2707; X64-LABEL: test_mm256_setr_epi16: 2708; X64: # BB#0: 2709; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2710; X64-NEXT: vmovd %eax, %xmm0 2711; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2712; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 2713; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2714; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 2715; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2716; X64-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 2717; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2718; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 2719; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2720; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 2721; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2722; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 2723; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2724; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 2725; X64-NEXT: vmovd %edi, %xmm1 2726; X64-NEXT: vpinsrw $1, %esi, %xmm1, %xmm1 2727; X64-NEXT: vpinsrw $2, %edx, %xmm1, %xmm1 2728; X64-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 2729; X64-NEXT: vpinsrw $4, %r8d, %xmm1, %xmm1 2730; X64-NEXT: vpinsrw $5, %r9d, %xmm1, %xmm1 2731; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2732; X64-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 2733; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2734; X64-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 2735; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2736; X64-NEXT: retq 2737 %res0 = insertelement <16 x i16> undef, i16 %a0 , i32 0 2738 %res1 = insertelement <16 x i16> %res0, i16 %a1 , i32 1 2739 %res2 = insertelement <16 x i16> %res1, i16 %a2 , i32 2 2740 %res3 = insertelement <16 x i16> %res2, i16 %a3 , i32 3 2741 %res4 = insertelement <16 x i16> %res3, i16 %a4 , i32 4 2742 %res5 = insertelement <16 x i16> %res4, i16 %a5 , i32 5 2743 %res6 = insertelement <16 x i16> %res5, i16 %a6 , i32 6 2744 %res7 = insertelement <16 x i16> %res6, i16 %a7 , i32 7 2745 %res8 = insertelement <16 x i16> %res7, i16 %a8 , i32 8 2746 %res9 = insertelement <16 x i16> %res8, i16 %a9 , i32 9 2747 %res10 = insertelement <16 x i16> %res9, i16 %a10, i32 10 2748 %res11 = insertelement <16 x i16> %res10, i16 %a11, i32 11 2749 %res12 = insertelement <16 x i16> %res11, i16 %a12, i32 12 2750 %res13 = insertelement <16 x i16> %res12, i16 %a13, i32 13 2751 %res14 = insertelement <16 x i16> %res13, i16 %a14, i32 14 2752 %res15 = insertelement <16 x i16> %res14, i16 %a15, i32 15 2753 %res = bitcast <16 x i16> %res15 to <4 x i64> 2754 ret <4 x i64> %res 2755} 2756 2757define <4 x i64> @test_mm256_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { 2758; X32-LABEL: test_mm256_setr_epi32: 2759; X32: # BB#0: 2760; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2761; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2762; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2763; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2764; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2765; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 2766; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 2767; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 2768; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2769; X32-NEXT: retl 2770; 2771; X64-LABEL: test_mm256_setr_epi32: 2772; X64: # BB#0: 2773; X64-NEXT: vmovd %r8d, %xmm0 2774; X64-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0 2775; X64-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0 2776; X64-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0 2777; X64-NEXT: vmovd %edi, %xmm1 2778; X64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1 2779; X64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 2780; X64-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 2781; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2782; X64-NEXT: retq 2783 %res0 = insertelement <8 x i32> undef, i32 %a0, i32 0 2784 %res1 = insertelement <8 x i32> %res0, i32 %a1, i32 1 2785 %res2 = insertelement <8 x i32> %res1, i32 %a2, i32 2 2786 %res3 = insertelement <8 x i32> %res2, i32 %a3, i32 3 2787 %res4 = insertelement <8 x i32> %res3, i32 %a4, i32 4 2788 %res5 = insertelement <8 x i32> %res4, i32 %a5, i32 5 2789 %res6 = insertelement <8 x i32> %res5, i32 %a6, i32 6 2790 %res7 = insertelement <8 x i32> %res6, i32 %a7, i32 7 2791 %res = bitcast <8 x i32> %res7 to <4 x i64> 2792 ret <4 x i64> %res 2793} 2794 2795define <4 x i64> @test_mm256_setr_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { 2796; X32-LABEL: test_mm256_setr_epi64x: 2797; X32: # BB#0: 2798; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2799; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2800; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2801; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2802; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2803; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 2804; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 2805; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 2806; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2807; X32-NEXT: retl 2808; 2809; X64-LABEL: test_mm256_setr_epi64x: 2810; X64: # BB#0: 2811; X64-NEXT: vmovq %rcx, %xmm0 2812; X64-NEXT: vmovq %rdx, %xmm1 2813; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2814; X64-NEXT: vmovq %rsi, %xmm1 2815; X64-NEXT: vmovq %rdi, %xmm2 2816; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 2817; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2818; X64-NEXT: retq 2819 %res0 = insertelement <4 x i64> undef, i64 %a0, i32 0 2820 %res1 = insertelement <4 x i64> %res0, i64 %a1, i32 1 2821 %res2 = insertelement <4 x i64> %res1, i64 %a2, i32 2 2822 %res3 = insertelement <4 x i64> %res2, i64 %a3, i32 3 2823 ret <4 x i64> %res3 2824} 2825 2826define <8 x float> @test_mm256_setr_m128(<4 x float> %a0, <4 x float> %a1) nounwind { 2827; X32-LABEL: test_mm256_setr_m128: 2828; X32: # BB#0: 2829; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2830; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2831; X32-NEXT: retl 2832; 2833; X64-LABEL: test_mm256_setr_m128: 2834; X64: # BB#0: 2835; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2836; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2837; X64-NEXT: retq 2838 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2839 ret <8 x float> %res 2840} 2841 2842define <4 x double> @test_mm256_setr_m128d(<2 x double> %a0, <2 x double> %a1) nounwind { 2843; X32-LABEL: test_mm256_setr_m128d: 2844; X32: # BB#0: 2845; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2846; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2847; X32-NEXT: retl 2848; 2849; X64-LABEL: test_mm256_setr_m128d: 2850; X64: # BB#0: 2851; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2852; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2853; X64-NEXT: retq 2854 %arg0 = bitcast <2 x double> %a0 to <4 x float> 2855 %arg1 = bitcast <2 x double> %a1 to <4 x float> 2856 %res = shufflevector <4 x float> %arg0, <4 x float> %arg1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2857 %bc = bitcast <8 x float> %res to <4 x double> 2858 ret <4 x double> %bc 2859} 2860 2861define <4 x i64> @test_mm256_setr_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2862; X32-LABEL: test_mm256_setr_m128i: 2863; X32: # BB#0: 2864; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2865; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2866; X32-NEXT: retl 2867; 2868; X64-LABEL: test_mm256_setr_m128i: 2869; X64: # BB#0: 2870; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 2871; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2872; X64-NEXT: retq 2873 %arg0 = bitcast <2 x i64> %a0 to <4 x float> 2874 %arg1 = bitcast <2 x i64> %a1 to <4 x float> 2875 %res = shufflevector <4 x float> %arg0, <4 x float> %arg1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2876 %bc = bitcast <8 x float> %res to <4 x i64> 2877 ret <4 x i64> %bc 2878} 2879 2880define <4 x double> @test_mm256_setr_pd(double %a0, double %a1, double %a2, double %a3) nounwind { 2881; X32-LABEL: test_mm256_setr_pd: 2882; X32: # BB#0: 2883; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2884; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2885; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 2886; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 2887; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2888; X32-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0] 2889; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2890; X32-NEXT: retl 2891; 2892; X64-LABEL: test_mm256_setr_pd: 2893; X64: # BB#0: 2894; X64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2895; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2896; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2897; X64-NEXT: retq 2898 %res0 = insertelement <4 x double> undef, double %a0, i32 0 2899 %res1 = insertelement <4 x double> %res0, double %a1, i32 1 2900 %res2 = insertelement <4 x double> %res1, double %a2, i32 2 2901 %res3 = insertelement <4 x double> %res2, double %a3, i32 3 2902 ret <4 x double> %res3 2903} 2904 2905define <8 x float> @test_mm256_setr_ps(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { 2906; X32-LABEL: test_mm256_setr_ps: 2907; X32: # BB#0: 2908; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2909; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2910; X32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2911; X32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 2912; X32-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero 2913; X32-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero 2914; X32-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero 2915; X32-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero 2916; X32-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2917; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2918; X32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2919; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm6[0],xmm7[2,3] 2920; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3] 2921; X32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] 2922; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2923; X32-NEXT: retl 2924; 2925; X64-LABEL: test_mm256_setr_ps: 2926; X64: # BB#0: 2927; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] 2928; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 2929; X64-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0] 2930; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2931; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2932; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 2933; X64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 2934; X64-NEXT: retq 2935 %res0 = insertelement <8 x float> undef, float %a0, i32 0 2936 %res1 = insertelement <8 x float> %res0, float %a1, i32 1 2937 %res2 = insertelement <8 x float> %res1, float %a2, i32 2 2938 %res3 = insertelement <8 x float> %res2, float %a3, i32 3 2939 %res4 = insertelement <8 x float> %res3, float %a4, i32 4 2940 %res5 = insertelement <8 x float> %res4, float %a5, i32 5 2941 %res6 = insertelement <8 x float> %res5, float %a6, i32 6 2942 %res7 = insertelement <8 x float> %res6, float %a7, i32 7 2943 ret <8 x float> %res7 2944} 2945 2946define <4 x double> @test_mm256_setzero_pd() nounwind { 2947; X32-LABEL: test_mm256_setzero_pd: 2948; X32: # BB#0: 2949; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0 2950; X32-NEXT: retl 2951; 2952; X64-LABEL: test_mm256_setzero_pd: 2953; X64: # BB#0: 2954; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0 2955; X64-NEXT: retq 2956 ret <4 x double> zeroinitializer 2957} 2958 2959define <8 x float> @test_mm256_setzero_ps() nounwind { 2960; X32-LABEL: test_mm256_setzero_ps: 2961; X32: # BB#0: 2962; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0 2963; X32-NEXT: retl 2964; 2965; X64-LABEL: test_mm256_setzero_ps: 2966; X64: # BB#0: 2967; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0 2968; X64-NEXT: retq 2969 ret <8 x float> zeroinitializer 2970} 2971 2972define <4 x i64> @test_mm256_setzero_si256() nounwind { 2973; X32-LABEL: test_mm256_setzero_si256: 2974; X32: # BB#0: 2975; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0 2976; X32-NEXT: retl 2977; 2978; X64-LABEL: test_mm256_setzero_si256: 2979; X64: # BB#0: 2980; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0 2981; X64-NEXT: retq 2982 ret <4 x i64> zeroinitializer 2983} 2984 2985define <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 2986; X32-LABEL: test_mm256_shuffle_pd: 2987; X32: # BB#0: 2988; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2989; X32-NEXT: retl 2990; 2991; X64-LABEL: test_mm256_shuffle_pd: 2992; X64: # BB#0: 2993; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2994; X64-NEXT: retq 2995 %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 2996 ret <4 x double> %res 2997} 2998 2999define <8 x float> @test_mm256_shuffle_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3000; X32-LABEL: test_mm256_shuffle_ps: 3001; X32: # BB#0: 3002; X32-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 3003; X32-NEXT: retl 3004; 3005; X64-LABEL: test_mm256_shuffle_ps: 3006; X64: # BB#0: 3007; X64-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] 3008; X64-NEXT: retq 3009 %res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12> 3010 ret <8 x float> %res 3011} 3012 3013define <4 x double> @test_mm256_sqrt_pd(<4 x double> %a0) nounwind { 3014; X32-LABEL: test_mm256_sqrt_pd: 3015; X32: # BB#0: 3016; X32-NEXT: vsqrtpd %ymm0, %ymm0 3017; X32-NEXT: retl 3018; 3019; X64-LABEL: test_mm256_sqrt_pd: 3020; X64: # BB#0: 3021; X64-NEXT: vsqrtpd %ymm0, %ymm0 3022; X64-NEXT: retq 3023 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) 3024 ret <4 x double> %res 3025} 3026declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 3027 3028define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind { 3029; X32-LABEL: test_mm256_sqrt_ps: 3030; X32: # BB#0: 3031; X32-NEXT: vsqrtps %ymm0, %ymm0 3032; X32-NEXT: retl 3033; 3034; X64-LABEL: test_mm256_sqrt_ps: 3035; X64: # BB#0: 3036; X64-NEXT: vsqrtps %ymm0, %ymm0 3037; X64-NEXT: retq 3038 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) 3039 ret <8 x float> %res 3040} 3041declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 3042 3043define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind { 3044; X32-LABEL: test_mm256_store_pd: 3045; X32: # BB#0: 3046; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3047; X32-NEXT: vmovaps %ymm0, (%eax) 3048; X32-NEXT: vzeroupper 3049; X32-NEXT: retl 3050; 3051; X64-LABEL: test_mm256_store_pd: 3052; X64: # BB#0: 3053; X64-NEXT: vmovaps %ymm0, (%rdi) 3054; X64-NEXT: vzeroupper 3055; X64-NEXT: retq 3056 %arg0 = bitcast double* %a0 to <4 x double>* 3057 store <4 x double> %a1, <4 x double>* %arg0, align 32 3058 ret void 3059} 3060 3061define void @test_mm256_store_ps(float* %a0, <8 x float> %a1) nounwind { 3062; X32-LABEL: test_mm256_store_ps: 3063; X32: # BB#0: 3064; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3065; X32-NEXT: vmovaps %ymm0, (%eax) 3066; X32-NEXT: vzeroupper 3067; X32-NEXT: retl 3068; 3069; X64-LABEL: test_mm256_store_ps: 3070; X64: # BB#0: 3071; X64-NEXT: vmovaps %ymm0, (%rdi) 3072; X64-NEXT: vzeroupper 3073; X64-NEXT: retq 3074 %arg0 = bitcast float* %a0 to <8 x float>* 3075 store <8 x float> %a1, <8 x float>* %arg0, align 32 3076 ret void 3077} 3078 3079define void @test_mm256_store_si256(<4 x i64>* %a0, <4 x i64> %a1) nounwind { 3080; X32-LABEL: test_mm256_store_si256: 3081; X32: # BB#0: 3082; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3083; X32-NEXT: vmovaps %ymm0, (%eax) 3084; X32-NEXT: vzeroupper 3085; X32-NEXT: retl 3086; 3087; X64-LABEL: test_mm256_store_si256: 3088; X64: # BB#0: 3089; X64-NEXT: vmovaps %ymm0, (%rdi) 3090; X64-NEXT: vzeroupper 3091; X64-NEXT: retq 3092 store <4 x i64> %a1, <4 x i64>* %a0, align 32 3093 ret void 3094} 3095 3096define void @test_mm256_storeu_pd(double* %a0, <4 x double> %a1) nounwind { 3097; X32-LABEL: test_mm256_storeu_pd: 3098; X32: # BB#0: 3099; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3100; X32-NEXT: vmovups %ymm0, (%eax) 3101; X32-NEXT: vzeroupper 3102; X32-NEXT: retl 3103; 3104; X64-LABEL: test_mm256_storeu_pd: 3105; X64: # BB#0: 3106; X64-NEXT: vmovups %ymm0, (%rdi) 3107; X64-NEXT: vzeroupper 3108; X64-NEXT: retq 3109 %arg0 = bitcast double* %a0 to <4 x double>* 3110 store <4 x double> %a1, <4 x double>* %arg0, align 1 3111 ret void 3112} 3113 3114define void @test_mm256_storeu_ps(float* %a0, <8 x float> %a1) nounwind { 3115; X32-LABEL: test_mm256_storeu_ps: 3116; X32: # BB#0: 3117; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3118; X32-NEXT: vmovups %ymm0, (%eax) 3119; X32-NEXT: vzeroupper 3120; X32-NEXT: retl 3121; 3122; X64-LABEL: test_mm256_storeu_ps: 3123; X64: # BB#0: 3124; X64-NEXT: vmovups %ymm0, (%rdi) 3125; X64-NEXT: vzeroupper 3126; X64-NEXT: retq 3127 %arg0 = bitcast float* %a0 to <8 x float>* 3128 store <8 x float> %a1, <8 x float>* %arg0, align 1 3129 ret void 3130} 3131 3132define void @test_mm256_storeu_si256(<4 x i64>* %a0, <4 x i64> %a1) nounwind { 3133; X32-LABEL: test_mm256_storeu_si256: 3134; X32: # BB#0: 3135; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3136; X32-NEXT: vmovups %ymm0, (%eax) 3137; X32-NEXT: vzeroupper 3138; X32-NEXT: retl 3139; 3140; X64-LABEL: test_mm256_storeu_si256: 3141; X64: # BB#0: 3142; X64-NEXT: vmovups %ymm0, (%rdi) 3143; X64-NEXT: vzeroupper 3144; X64-NEXT: retq 3145 store <4 x i64> %a1, <4 x i64>* %a0, align 1 3146 ret void 3147} 3148 3149define void @test_mm256_storeu2_m128(float* %a0, float* %a1, <8 x float> %a2) nounwind { 3150; X32-LABEL: test_mm256_storeu2_m128: 3151; X32: # BB#0: 3152; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3153; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3154; X32-NEXT: vmovups %xmm0, (%ecx) 3155; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 3156; X32-NEXT: vmovups %xmm0, (%eax) 3157; X32-NEXT: vzeroupper 3158; X32-NEXT: retl 3159; 3160; X64-LABEL: test_mm256_storeu2_m128: 3161; X64: # BB#0: 3162; X64-NEXT: vmovups %xmm0, (%rdi) 3163; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 3164; X64-NEXT: vmovups %xmm0, (%rsi) 3165; X64-NEXT: vzeroupper 3166; X64-NEXT: retq 3167 %arg0 = bitcast float* %a0 to <4 x float>* 3168 %lo = shufflevector <8 x float> %a2, <8 x float> %a2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3169 store <4 x float> %lo, <4 x float>* %arg0, align 1 3170 %arg1 = bitcast float* %a1 to <4 x float>* 3171 %hi = shufflevector <8 x float> %a2, <8 x float> %a2, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3172 store <4 x float> %hi, <4 x float>* %arg1, align 1 3173 ret void 3174} 3175 3176define void @test_mm256_storeu2_m128d(double* %a0, double* %a1, <4 x double> %a2) nounwind { 3177; X32-LABEL: test_mm256_storeu2_m128d: 3178; X32: # BB#0: 3179; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3180; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3181; X32-NEXT: vmovups %xmm0, (%ecx) 3182; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 3183; X32-NEXT: vmovups %xmm0, (%eax) 3184; X32-NEXT: vzeroupper 3185; X32-NEXT: retl 3186; 3187; X64-LABEL: test_mm256_storeu2_m128d: 3188; X64: # BB#0: 3189; X64-NEXT: vmovups %xmm0, (%rdi) 3190; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 3191; X64-NEXT: vmovups %xmm0, (%rsi) 3192; X64-NEXT: vzeroupper 3193; X64-NEXT: retq 3194 %arg0 = bitcast double* %a0 to <2 x double>* 3195 %lo = shufflevector <4 x double> %a2, <4 x double> %a2, <2 x i32> <i32 0, i32 1> 3196 store <2 x double> %lo, <2 x double>* %arg0, align 1 3197 %arg1 = bitcast double* %a1 to <2 x double>* 3198 %hi = shufflevector <4 x double> %a2, <4 x double> %a2, <2 x i32> <i32 2, i32 3> 3199 store <2 x double> %hi, <2 x double>* %arg1, align 1 3200 ret void 3201} 3202 3203define void @test_mm256_storeu2_m128i(<2 x i64>* %a0, <2 x i64>* %a1, <4 x i64> %a2) nounwind { 3204; X32-LABEL: test_mm256_storeu2_m128i: 3205; X32: # BB#0: 3206; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3207; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3208; X32-NEXT: vmovups %xmm0, (%ecx) 3209; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 3210; X32-NEXT: vmovups %xmm0, (%eax) 3211; X32-NEXT: vzeroupper 3212; X32-NEXT: retl 3213; 3214; X64-LABEL: test_mm256_storeu2_m128i: 3215; X64: # BB#0: 3216; X64-NEXT: vmovups %xmm0, (%rdi) 3217; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 3218; X64-NEXT: vmovups %xmm0, (%rsi) 3219; X64-NEXT: vzeroupper 3220; X64-NEXT: retq 3221 %arg0 = bitcast <2 x i64>* %a0 to <2 x i64>* 3222 %lo = shufflevector <4 x i64> %a2, <4 x i64> %a2, <2 x i32> <i32 0, i32 1> 3223 store <2 x i64> %lo, <2 x i64>* %arg0, align 1 3224 %arg1 = bitcast <2 x i64>* %a1 to <2 x i64>* 3225 %hi = shufflevector <4 x i64> %a2, <4 x i64> %a2, <2 x i32> <i32 2, i32 3> 3226 store <2 x i64> %hi, <2 x i64>* %arg1, align 1 3227 ret void 3228} 3229 3230define void @test_mm256_stream_pd(double *%a0, <4 x double> %a1) nounwind { 3231; X32-LABEL: test_mm256_stream_pd: 3232; X32: # BB#0: 3233; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3234; X32-NEXT: vmovntps %ymm0, (%eax) 3235; X32-NEXT: vzeroupper 3236; X32-NEXT: retl 3237; 3238; X64-LABEL: test_mm256_stream_pd: 3239; X64: # BB#0: 3240; X64-NEXT: vmovntps %ymm0, (%rdi) 3241; X64-NEXT: vzeroupper 3242; X64-NEXT: retq 3243 %arg0 = bitcast double* %a0 to <4 x double>* 3244 store <4 x double> %a1, <4 x double>* %arg0, align 32, !nontemporal !0 3245 ret void 3246} 3247 3248define void @test_mm256_stream_ps(float *%a0, <8 x float> %a1) nounwind { 3249; X32-LABEL: test_mm256_stream_ps: 3250; X32: # BB#0: 3251; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3252; X32-NEXT: vmovntps %ymm0, (%eax) 3253; X32-NEXT: vzeroupper 3254; X32-NEXT: retl 3255; 3256; X64-LABEL: test_mm256_stream_ps: 3257; X64: # BB#0: 3258; X64-NEXT: vmovntps %ymm0, (%rdi) 3259; X64-NEXT: vzeroupper 3260; X64-NEXT: retq 3261 %arg0 = bitcast float* %a0 to <8 x float>* 3262 store <8 x float> %a1, <8 x float>* %arg0, align 32, !nontemporal !0 3263 ret void 3264} 3265 3266define void @test_mm256_stream_si256(<4 x i64> *%a0, <4 x i64> %a1) nounwind { 3267; X32-LABEL: test_mm256_stream_si256: 3268; X32: # BB#0: 3269; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3270; X32-NEXT: vmovntps %ymm0, (%eax) 3271; X32-NEXT: vzeroupper 3272; X32-NEXT: retl 3273; 3274; X64-LABEL: test_mm256_stream_si256: 3275; X64: # BB#0: 3276; X64-NEXT: vmovntps %ymm0, (%rdi) 3277; X64-NEXT: vzeroupper 3278; X64-NEXT: retq 3279 store <4 x i64> %a1, <4 x i64>* %a0, align 32, !nontemporal !0 3280 ret void 3281} 3282 3283define <4 x double> @test_mm256_sub_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3284; X32-LABEL: test_mm256_sub_pd: 3285; X32: # BB#0: 3286; X32-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3287; X32-NEXT: retl 3288; 3289; X64-LABEL: test_mm256_sub_pd: 3290; X64: # BB#0: 3291; X64-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3292; X64-NEXT: retq 3293 %res = fsub <4 x double> %a0, %a1 3294 ret <4 x double> %res 3295} 3296 3297define <8 x float> @test_mm256_sub_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3298; X32-LABEL: test_mm256_sub_ps: 3299; X32: # BB#0: 3300; X32-NEXT: vsubps %ymm1, %ymm0, %ymm0 3301; X32-NEXT: retl 3302; 3303; X64-LABEL: test_mm256_sub_ps: 3304; X64: # BB#0: 3305; X64-NEXT: vsubps %ymm1, %ymm0, %ymm0 3306; X64-NEXT: retq 3307 %res = fsub <8 x float> %a0, %a1 3308 ret <8 x float> %res 3309} 3310 3311define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 3312; X32-LABEL: test_mm_testc_pd: 3313; X32: # BB#0: 3314; X32-NEXT: vtestpd %xmm1, %xmm0 3315; X32-NEXT: sbbl %eax, %eax 3316; X32-NEXT: andl $1, %eax 3317; X32-NEXT: retl 3318; 3319; X64-LABEL: test_mm_testc_pd: 3320; X64: # BB#0: 3321; X64-NEXT: vtestpd %xmm1, %xmm0 3322; X64-NEXT: sbbl %eax, %eax 3323; X64-NEXT: andl $1, %eax 3324; X64-NEXT: retq 3325 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 3326 ret i32 %res 3327} 3328declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 3329 3330define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3331; X32-LABEL: test_mm256_testc_pd: 3332; X32: # BB#0: 3333; X32-NEXT: vtestpd %ymm1, %ymm0 3334; X32-NEXT: sbbl %eax, %eax 3335; X32-NEXT: andl $1, %eax 3336; X32-NEXT: vzeroupper 3337; X32-NEXT: retl 3338; 3339; X64-LABEL: test_mm256_testc_pd: 3340; X64: # BB#0: 3341; X64-NEXT: vtestpd %ymm1, %ymm0 3342; X64-NEXT: sbbl %eax, %eax 3343; X64-NEXT: andl $1, %eax 3344; X64-NEXT: vzeroupper 3345; X64-NEXT: retq 3346 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 3347 ret i32 %res 3348} 3349declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3350 3351define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3352; X32-LABEL: test_mm_testc_ps: 3353; X32: # BB#0: 3354; X32-NEXT: vtestps %xmm1, %xmm0 3355; X32-NEXT: sbbl %eax, %eax 3356; X32-NEXT: andl $1, %eax 3357; X32-NEXT: retl 3358; 3359; X64-LABEL: test_mm_testc_ps: 3360; X64: # BB#0: 3361; X64-NEXT: vtestps %xmm1, %xmm0 3362; X64-NEXT: sbbl %eax, %eax 3363; X64-NEXT: andl $1, %eax 3364; X64-NEXT: retq 3365 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 3366 ret i32 %res 3367} 3368declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 3369 3370define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3371; X32-LABEL: test_mm256_testc_ps: 3372; X32: # BB#0: 3373; X32-NEXT: vtestps %ymm1, %ymm0 3374; X32-NEXT: sbbl %eax, %eax 3375; X32-NEXT: andl $1, %eax 3376; X32-NEXT: vzeroupper 3377; X32-NEXT: retl 3378; 3379; X64-LABEL: test_mm256_testc_ps: 3380; X64: # BB#0: 3381; X64-NEXT: vtestps %ymm1, %ymm0 3382; X64-NEXT: sbbl %eax, %eax 3383; X64-NEXT: andl $1, %eax 3384; X64-NEXT: vzeroupper 3385; X64-NEXT: retq 3386 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 3387 ret i32 %res 3388} 3389declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3390 3391define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 3392; X32-LABEL: test_mm256_testc_si256: 3393; X32: # BB#0: 3394; X32-NEXT: vptest %ymm1, %ymm0 3395; X32-NEXT: sbbl %eax, %eax 3396; X32-NEXT: andl $1, %eax 3397; X32-NEXT: vzeroupper 3398; X32-NEXT: retl 3399; 3400; X64-LABEL: test_mm256_testc_si256: 3401; X64: # BB#0: 3402; X64-NEXT: vptest %ymm1, %ymm0 3403; X64-NEXT: sbbl %eax, %eax 3404; X64-NEXT: andl $1, %eax 3405; X64-NEXT: vzeroupper 3406; X64-NEXT: retq 3407 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) 3408 ret i32 %res 3409} 3410declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 3411 3412define i32 @test_mm_testnzc_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 3413; X32-LABEL: test_mm_testnzc_pd: 3414; X32: # BB#0: 3415; X32-NEXT: xorl %eax, %eax 3416; X32-NEXT: vtestpd %xmm1, %xmm0 3417; X32-NEXT: seta %al 3418; X32-NEXT: retl 3419; 3420; X64-LABEL: test_mm_testnzc_pd: 3421; X64: # BB#0: 3422; X64-NEXT: xorl %eax, %eax 3423; X64-NEXT: vtestpd %xmm1, %xmm0 3424; X64-NEXT: seta %al 3425; X64-NEXT: retq 3426 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) 3427 ret i32 %res 3428} 3429declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 3430 3431define i32 @test_mm256_testnzc_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3432; X32-LABEL: test_mm256_testnzc_pd: 3433; X32: # BB#0: 3434; X32-NEXT: xorl %eax, %eax 3435; X32-NEXT: vtestpd %ymm1, %ymm0 3436; X32-NEXT: seta %al 3437; X32-NEXT: vzeroupper 3438; X32-NEXT: retl 3439; 3440; X64-LABEL: test_mm256_testnzc_pd: 3441; X64: # BB#0: 3442; X64-NEXT: xorl %eax, %eax 3443; X64-NEXT: vtestpd %ymm1, %ymm0 3444; X64-NEXT: seta %al 3445; X64-NEXT: vzeroupper 3446; X64-NEXT: retq 3447 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) 3448 ret i32 %res 3449} 3450declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3451 3452define i32 @test_mm_testnzc_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3453; X32-LABEL: test_mm_testnzc_ps: 3454; X32: # BB#0: 3455; X32-NEXT: xorl %eax, %eax 3456; X32-NEXT: vtestps %xmm1, %xmm0 3457; X32-NEXT: seta %al 3458; X32-NEXT: retl 3459; 3460; X64-LABEL: test_mm_testnzc_ps: 3461; X64: # BB#0: 3462; X64-NEXT: xorl %eax, %eax 3463; X64-NEXT: vtestps %xmm1, %xmm0 3464; X64-NEXT: seta %al 3465; X64-NEXT: retq 3466 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) 3467 ret i32 %res 3468} 3469declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 3470 3471define i32 @test_mm256_testnzc_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3472; X32-LABEL: test_mm256_testnzc_ps: 3473; X32: # BB#0: 3474; X32-NEXT: xorl %eax, %eax 3475; X32-NEXT: vtestps %ymm1, %ymm0 3476; X32-NEXT: seta %al 3477; X32-NEXT: vzeroupper 3478; X32-NEXT: retl 3479; 3480; X64-LABEL: test_mm256_testnzc_ps: 3481; X64: # BB#0: 3482; X64-NEXT: xorl %eax, %eax 3483; X64-NEXT: vtestps %ymm1, %ymm0 3484; X64-NEXT: seta %al 3485; X64-NEXT: vzeroupper 3486; X64-NEXT: retq 3487 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) 3488 ret i32 %res 3489} 3490declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3491 3492define i32 @test_mm256_testnzc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 3493; X32-LABEL: test_mm256_testnzc_si256: 3494; X32: # BB#0: 3495; X32-NEXT: xorl %eax, %eax 3496; X32-NEXT: vptest %ymm1, %ymm0 3497; X32-NEXT: seta %al 3498; X32-NEXT: vzeroupper 3499; X32-NEXT: retl 3500; 3501; X64-LABEL: test_mm256_testnzc_si256: 3502; X64: # BB#0: 3503; X64-NEXT: xorl %eax, %eax 3504; X64-NEXT: vptest %ymm1, %ymm0 3505; X64-NEXT: seta %al 3506; X64-NEXT: vzeroupper 3507; X64-NEXT: retq 3508 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) 3509 ret i32 %res 3510} 3511declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 3512 3513define i32 @test_mm_testz_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 3514; X32-LABEL: test_mm_testz_pd: 3515; X32: # BB#0: 3516; X32-NEXT: xorl %eax, %eax 3517; X32-NEXT: vtestpd %xmm1, %xmm0 3518; X32-NEXT: sete %al 3519; X32-NEXT: retl 3520; 3521; X64-LABEL: test_mm_testz_pd: 3522; X64: # BB#0: 3523; X64-NEXT: xorl %eax, %eax 3524; X64-NEXT: vtestpd %xmm1, %xmm0 3525; X64-NEXT: sete %al 3526; X64-NEXT: retq 3527 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) 3528 ret i32 %res 3529} 3530declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 3531 3532define i32 @test_mm256_testz_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3533; X32-LABEL: test_mm256_testz_pd: 3534; X32: # BB#0: 3535; X32-NEXT: xorl %eax, %eax 3536; X32-NEXT: vtestpd %ymm1, %ymm0 3537; X32-NEXT: sete %al 3538; X32-NEXT: vzeroupper 3539; X32-NEXT: retl 3540; 3541; X64-LABEL: test_mm256_testz_pd: 3542; X64: # BB#0: 3543; X64-NEXT: xorl %eax, %eax 3544; X64-NEXT: vtestpd %ymm1, %ymm0 3545; X64-NEXT: sete %al 3546; X64-NEXT: vzeroupper 3547; X64-NEXT: retq 3548 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) 3549 ret i32 %res 3550} 3551declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 3552 3553define i32 @test_mm_testz_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3554; X32-LABEL: test_mm_testz_ps: 3555; X32: # BB#0: 3556; X32-NEXT: xorl %eax, %eax 3557; X32-NEXT: vtestps %xmm1, %xmm0 3558; X32-NEXT: sete %al 3559; X32-NEXT: retl 3560; 3561; X64-LABEL: test_mm_testz_ps: 3562; X64: # BB#0: 3563; X64-NEXT: xorl %eax, %eax 3564; X64-NEXT: vtestps %xmm1, %xmm0 3565; X64-NEXT: sete %al 3566; X64-NEXT: retq 3567 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) 3568 ret i32 %res 3569} 3570declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 3571 3572define i32 @test_mm256_testz_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3573; X32-LABEL: test_mm256_testz_ps: 3574; X32: # BB#0: 3575; X32-NEXT: xorl %eax, %eax 3576; X32-NEXT: vtestps %ymm1, %ymm0 3577; X32-NEXT: sete %al 3578; X32-NEXT: vzeroupper 3579; X32-NEXT: retl 3580; 3581; X64-LABEL: test_mm256_testz_ps: 3582; X64: # BB#0: 3583; X64-NEXT: xorl %eax, %eax 3584; X64-NEXT: vtestps %ymm1, %ymm0 3585; X64-NEXT: sete %al 3586; X64-NEXT: vzeroupper 3587; X64-NEXT: retq 3588 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) 3589 ret i32 %res 3590} 3591declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 3592 3593define i32 @test_mm256_testz_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { 3594; X32-LABEL: test_mm256_testz_si256: 3595; X32: # BB#0: 3596; X32-NEXT: xorl %eax, %eax 3597; X32-NEXT: vptest %ymm1, %ymm0 3598; X32-NEXT: sete %al 3599; X32-NEXT: vzeroupper 3600; X32-NEXT: retl 3601; 3602; X64-LABEL: test_mm256_testz_si256: 3603; X64: # BB#0: 3604; X64-NEXT: xorl %eax, %eax 3605; X64-NEXT: vptest %ymm1, %ymm0 3606; X64-NEXT: sete %al 3607; X64-NEXT: vzeroupper 3608; X64-NEXT: retq 3609 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) 3610 ret i32 %res 3611} 3612declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 3613 3614define <2 x double> @test_mm_undefined_pd() nounwind { 3615; X32-LABEL: test_mm_undefined_pd: 3616; X32: # BB#0: 3617; X32-NEXT: retl 3618; 3619; X64-LABEL: test_mm_undefined_pd: 3620; X64: # BB#0: 3621; X64-NEXT: retq 3622 ret <2 x double> undef 3623} 3624 3625define <4 x double> @test_mm256_undefined_pd() nounwind { 3626; X32-LABEL: test_mm256_undefined_pd: 3627; X32: # BB#0: 3628; X32-NEXT: retl 3629; 3630; X64-LABEL: test_mm256_undefined_pd: 3631; X64: # BB#0: 3632; X64-NEXT: retq 3633 ret <4 x double> undef 3634} 3635 3636define <8 x float> @test_mm256_undefined_ps() nounwind { 3637; X32-LABEL: test_mm256_undefined_ps: 3638; X32: # BB#0: 3639; X32-NEXT: retl 3640; 3641; X64-LABEL: test_mm256_undefined_ps: 3642; X64: # BB#0: 3643; X64-NEXT: retq 3644 ret <8 x float> undef 3645} 3646 3647define <4 x i64> @test_mm256_undefined_si256() nounwind { 3648; X32-LABEL: test_mm256_undefined_si256: 3649; X32: # BB#0: 3650; X32-NEXT: retl 3651; 3652; X64-LABEL: test_mm256_undefined_si256: 3653; X64: # BB#0: 3654; X64-NEXT: retq 3655 ret <4 x i64> undef 3656} 3657 3658define <4 x double> @test_mm256_unpackhi_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3659; X32-LABEL: test_mm256_unpackhi_pd: 3660; X32: # BB#0: 3661; X32-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 3662; X32-NEXT: retl 3663; 3664; X64-LABEL: test_mm256_unpackhi_pd: 3665; X64: # BB#0: 3666; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 3667; X64-NEXT: retq 3668 %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 3669 ret <4 x double> %res 3670} 3671 3672define <8 x float> @test_mm256_unpackhi_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3673; X32-LABEL: test_mm256_unpackhi_ps: 3674; X32: # BB#0: 3675; X32-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 3676; X32-NEXT: retl 3677; 3678; X64-LABEL: test_mm256_unpackhi_ps: 3679; X64: # BB#0: 3680; X64-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 3681; X64-NEXT: retq 3682 %res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 3683 ret <8 x float> %res 3684} 3685 3686define <4 x double> @test_mm256_unpacklo_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3687; X32-LABEL: test_mm256_unpacklo_pd: 3688; X32: # BB#0: 3689; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 3690; X32-NEXT: retl 3691; 3692; X64-LABEL: test_mm256_unpacklo_pd: 3693; X64: # BB#0: 3694; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 3695; X64-NEXT: retq 3696 %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 3697 ret <4 x double> %res 3698} 3699 3700define <8 x float> @test_mm256_unpacklo_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3701; X32-LABEL: test_mm256_unpacklo_ps: 3702; X32: # BB#0: 3703; X32-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 3704; X32-NEXT: retl 3705; 3706; X64-LABEL: test_mm256_unpacklo_ps: 3707; X64: # BB#0: 3708; X64-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 3709; X64-NEXT: retq 3710 %res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 3711 ret <8 x float> %res 3712} 3713 3714define <4 x double> @test_mm256_xor_pd(<4 x double> %a0, <4 x double> %a1) nounwind { 3715; X32-LABEL: test_mm256_xor_pd: 3716; X32: # BB#0: 3717; X32-NEXT: vxorps %ymm1, %ymm0, %ymm0 3718; X32-NEXT: retl 3719; 3720; X64-LABEL: test_mm256_xor_pd: 3721; X64: # BB#0: 3722; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 3723; X64-NEXT: retq 3724 %1 = bitcast <4 x double> %a0 to <4 x i64> 3725 %2 = bitcast <4 x double> %a1 to <4 x i64> 3726 %res = xor <4 x i64> %1, %2 3727 %bc = bitcast <4 x i64> %res to <4 x double> 3728 ret <4 x double> %bc 3729} 3730 3731define <8 x float> @test_mm256_xor_ps(<8 x float> %a0, <8 x float> %a1) nounwind { 3732; X32-LABEL: test_mm256_xor_ps: 3733; X32: # BB#0: 3734; X32-NEXT: vxorps %ymm1, %ymm0, %ymm0 3735; X32-NEXT: retl 3736; 3737; X64-LABEL: test_mm256_xor_ps: 3738; X64: # BB#0: 3739; X64-NEXT: vxorps %ymm1, %ymm0, %ymm0 3740; X64-NEXT: retq 3741 %1 = bitcast <8 x float> %a0 to <8 x i32> 3742 %2 = bitcast <8 x float> %a1 to <8 x i32> 3743 %res = xor <8 x i32> %1, %2 3744 %bc = bitcast <8 x i32> %res to <8 x float> 3745 ret <8 x float> %bc 3746} 3747 3748define void @test_mm256_zeroall() nounwind { 3749; X32-LABEL: test_mm256_zeroall: 3750; X32: # BB#0: 3751; X32-NEXT: vzeroall 3752; X32-NEXT: retl 3753; 3754; X64-LABEL: test_mm256_zeroall: 3755; X64: # BB#0: 3756; X64-NEXT: vzeroall 3757; X64-NEXT: retq 3758 call void @llvm.x86.avx.vzeroall() 3759 ret void 3760} 3761declare void @llvm.x86.avx.vzeroall() nounwind readnone 3762 3763define void @test_mm256_zeroupper() nounwind { 3764; X32-LABEL: test_mm256_zeroupper: 3765; X32: # BB#0: 3766; X32-NEXT: vzeroupper 3767; X32-NEXT: retl 3768; 3769; X64-LABEL: test_mm256_zeroupper: 3770; X64: # BB#0: 3771; X64-NEXT: vzeroupper 3772; X64-NEXT: retq 3773 call void @llvm.x86.avx.vzeroupper() 3774 ret void 3775} 3776declare void @llvm.x86.avx.vzeroupper() nounwind readnone 3777 3778!0 = !{i32 1} 3779