1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X86-AVX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X86-AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX,X64-AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512VL,X64-AVX512VL 6 7; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. 8 9define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 10; AVX-LABEL: test_x86_avx_sqrt_pd_256: 11; AVX: # %bb.0: 12; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] 13; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14; 15; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: 16; AVX512VL: # %bb.0: 17; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] 18; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 19 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 20 ret <4 x double> %res 21} 22declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 23 24define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 25; AVX-LABEL: test_x86_avx_sqrt_ps_256: 26; AVX: # %bb.0: 27; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] 28; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 29; 30; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: 31; AVX512VL: # %bb.0: 32; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] 33; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 34 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 35 ret <8 x float> %res 36} 37declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 38 39define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) { 40; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1: 41; AVX: # %bb.0: 42; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 43; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 44; 45; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1: 46; AVX512VL: # %bb.0: 47; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 48; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 49 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1) 50 ret <4 x double> %res 51} 52declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 53 54define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) { 55; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1: 56; AVX: # %bb.0: 57; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 58; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 59; 60; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1: 61; AVX512VL: # %bb.0: 62; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 63; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1) 65 ret <8 x float> %res 66} 67declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 68 69define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) { 70; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1: 71; AVX: # %bb.0: 72; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 73; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 74; 75; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1: 76; AVX512VL: # %bb.0: 77; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 78; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1) 80 ret <8 x i32> %res 81} 82 83; Verify that high bits of the immediate are masked off. This should be the equivalent 84; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's 85; not a vinsertf128 $1. 86define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) { 87; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2: 88; CHECK: # %bb.0: 89; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 90; CHECK-NEXT: vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0] 91; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 92; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 93 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2) 94 ret <8 x i32> %res 95} 96declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 97 98; We don't check any vextractf128 variant with immediate 0 because that's just a move. 99 100define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) { 101; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1: 102; AVX: # %bb.0: 103; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 104; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 105; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106; 107; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1: 108; AVX512VL: # %bb.0: 109; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 110; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 111; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 112 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1) 113 ret <2 x double> %res 114} 115declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 116 117define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) { 118; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1: 119; AVX: # %bb.0: 120; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 121; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 122; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 123; 124; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1: 125; AVX512VL: # %bb.0: 126; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 127; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 128; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 129 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1) 130 ret <4 x float> %res 131} 132declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 133 134define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) { 135; AVX-LABEL: test_x86_avx_vextractf128_si_256_1: 136; AVX: # %bb.0: 137; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 138; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 139; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 140; 141; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1: 142; AVX512VL: # %bb.0: 143; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 144; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 145; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 146 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1) 147 ret <4 x i32> %res 148} 149declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 150 151; Verify that high bits of the immediate are masked off. This should be the equivalent 152; of a vextractf128 $0 which should be optimized away, so just check that it's 153; not a vextractf128 of any kind. 154define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) { 155; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2: 156; CHECK: # %bb.0: 157; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 158; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 159; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 160 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2) 161 ret <2 x double> %res 162} 163 164 165define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 166; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 167; X86-AVX: # %bb.0: 168; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 169; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 170; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 171; X86-AVX-NEXT: retl # encoding: [0xc3] 172; 173; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 174; X86-AVX512VL: # %bb.0: 175; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 176; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 177; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 178; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 179; 180; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 181; X64-AVX: # %bb.0: 182; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 183; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 184; X64-AVX-NEXT: retq # encoding: [0xc3] 185; 186; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 187; X64-AVX512VL: # %bb.0: 188; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 189; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 190; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 191 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 192 ret <4 x double> %res 193} 194declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 195 196 197define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 198; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 199; X86-AVX: # %bb.0: 200; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 201; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 202; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 203; X86-AVX-NEXT: retl # encoding: [0xc3] 204; 205; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 206; X86-AVX512VL: # %bb.0: 207; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 208; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 209; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 210; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 211; 212; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 213; X64-AVX: # %bb.0: 214; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 215; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 216; X64-AVX-NEXT: retq # encoding: [0xc3] 217; 218; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 219; X64-AVX512VL: # %bb.0: 220; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 221; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 222; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 223 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 224 ret <8 x float> %res 225} 226declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 227 228 229define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 230; CHECK-LABEL: test_x86_avx_blend_pd_256: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0] 233; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] 234; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 235 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 236 ret <4 x double> %res 237} 238declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 239 240 241define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 242; CHECK-LABEL: test_x86_avx_blend_ps_256: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07] 245; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 246; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 247 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 248 ret <8 x float> %res 249} 250declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 251 252 253define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 254; CHECK-LABEL: test_x86_avx_dp_ps_256: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07] 257; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 258 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 259 ret <8 x float> %res 260} 261declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 262 263 264define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 265; AVX-LABEL: test_x86_sse2_psll_dq: 266; AVX: # %bb.0: 267; AVX-NEXT: vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01] 268; AVX-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 269; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 270; 271; AVX512VL-LABEL: test_x86_sse2_psll_dq: 272; AVX512VL: # %bb.0: 273; AVX512VL-NEXT: vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01] 274; AVX512VL-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 275; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 276 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 277 ret <2 x i64> %res 278} 279declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 280 281 282define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 283; AVX-LABEL: test_x86_sse2_psrl_dq: 284; AVX: # %bb.0: 285; AVX-NEXT: vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01] 286; AVX-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 287; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 288; 289; AVX512VL-LABEL: test_x86_sse2_psrl_dq: 290; AVX512VL: # %bb.0: 291; AVX512VL-NEXT: vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01] 292; AVX512VL-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 293; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 295 ret <2 x i64> %res 296} 297declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 298 299 300define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 301; CHECK-LABEL: test_x86_sse41_blendpd: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03] 304; CHECK-NEXT: # xmm0 = xmm0[0,1],xmm1[2,3] 305; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 306 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1] 307 ret <2 x double> %res 308} 309declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone 310 311 312define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 313; CHECK-LABEL: test_x86_sse41_blendps: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 316; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3] 317; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 318 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 319 ret <4 x float> %res 320} 321declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone 322 323 324define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 325; CHECK-LABEL: test_x86_sse41_pblendw: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07] 328; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 329; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 330 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1] 331 ret <8 x i16> %res 332} 333declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone 334 335 336define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 337; AVX-LABEL: test_x86_sse41_pmovsxbd: 338; AVX: # %bb.0: 339; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0] 340; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 341; 342; AVX512VL-LABEL: test_x86_sse41_pmovsxbd: 343; AVX512VL: # %bb.0: 344; AVX512VL-NEXT: vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0] 345; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 347 ret <4 x i32> %res 348} 349declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 350 351 352define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 353; AVX-LABEL: test_x86_sse41_pmovsxbq: 354; AVX: # %bb.0: 355; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0] 356; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 357; 358; AVX512VL-LABEL: test_x86_sse41_pmovsxbq: 359; AVX512VL: # %bb.0: 360; AVX512VL-NEXT: vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0] 361; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 362 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 363 ret <2 x i64> %res 364} 365declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 366 367 368define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 369; AVX-LABEL: test_x86_sse41_pmovsxbw: 370; AVX: # %bb.0: 371; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0] 372; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 373; 374; AVX512VL-LABEL: test_x86_sse41_pmovsxbw: 375; AVX512VL: # %bb.0: 376; AVX512VL-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 377; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 378 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 379 ret <8 x i16> %res 380} 381declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 382 383 384define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 385; AVX-LABEL: test_x86_sse41_pmovsxdq: 386; AVX: # %bb.0: 387; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0] 388; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 389; 390; AVX512VL-LABEL: test_x86_sse41_pmovsxdq: 391; AVX512VL: # %bb.0: 392; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 393; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 394 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 395 ret <2 x i64> %res 396} 397declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 398 399 400define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 401; AVX-LABEL: test_x86_sse41_pmovsxwd: 402; AVX: # %bb.0: 403; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0] 404; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405; 406; AVX512VL-LABEL: test_x86_sse41_pmovsxwd: 407; AVX512VL: # %bb.0: 408; AVX512VL-NEXT: vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0] 409; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 410 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 411 ret <4 x i32> %res 412} 413declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 414 415 416define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 417; AVX-LABEL: test_x86_sse41_pmovsxwq: 418; AVX: # %bb.0: 419; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0] 420; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 421; 422; AVX512VL-LABEL: test_x86_sse41_pmovsxwq: 423; AVX512VL: # %bb.0: 424; AVX512VL-NEXT: vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0] 425; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 427 ret <2 x i64> %res 428} 429declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 430 431 432define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 433; AVX-LABEL: test_x86_sse41_pmovzxbd: 434; AVX: # %bb.0: 435; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0] 436; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 437; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 438; 439; AVX512VL-LABEL: test_x86_sse41_pmovzxbd: 440; AVX512VL: # %bb.0: 441; AVX512VL-NEXT: vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0] 442; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 443; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 444 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 445 ret <4 x i32> %res 446} 447declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 448 449 450define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 451; AVX-LABEL: test_x86_sse41_pmovzxbq: 452; AVX: # %bb.0: 453; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0] 454; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 455; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456; 457; AVX512VL-LABEL: test_x86_sse41_pmovzxbq: 458; AVX512VL: # %bb.0: 459; AVX512VL-NEXT: vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0] 460; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 461; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 462 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 463 ret <2 x i64> %res 464} 465declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 466 467 468define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 469; AVX-LABEL: test_x86_sse41_pmovzxbw: 470; AVX: # %bb.0: 471; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0] 472; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 473; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 474; 475; AVX512VL-LABEL: test_x86_sse41_pmovzxbw: 476; AVX512VL: # %bb.0: 477; AVX512VL-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 478; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 479; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 480 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 481 ret <8 x i16> %res 482} 483declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 484 485 486define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 487; AVX-LABEL: test_x86_sse41_pmovzxdq: 488; AVX: # %bb.0: 489; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0] 490; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 491; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 492; 493; AVX512VL-LABEL: test_x86_sse41_pmovzxdq: 494; AVX512VL: # %bb.0: 495; AVX512VL-NEXT: vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0] 496; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 497; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 498 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 499 ret <2 x i64> %res 500} 501declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 502 503 504define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 505; AVX-LABEL: test_x86_sse41_pmovzxwd: 506; AVX: # %bb.0: 507; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0] 508; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 509; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 510; 511; AVX512VL-LABEL: test_x86_sse41_pmovzxwd: 512; AVX512VL: # %bb.0: 513; AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] 514; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 515; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 516 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 517 ret <4 x i32> %res 518} 519declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 520 521 522define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 523; AVX-LABEL: test_x86_sse41_pmovzxwq: 524; AVX: # %bb.0: 525; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0] 526; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 527; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528; 529; AVX512VL-LABEL: test_x86_sse41_pmovzxwq: 530; AVX512VL: # %bb.0: 531; AVX512VL-NEXT: vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] 532; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 533; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 535 ret <2 x i64> %res 536} 537declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 538 539 540define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 541; AVX-LABEL: test_x86_sse2_cvtdq2pd: 542; AVX: # %bb.0: 543; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 544; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545; 546; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd: 547; AVX512VL: # %bb.0: 548; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 549; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 550 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 551 ret <2 x double> %res 552} 553declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 554 555 556define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 557; AVX-LABEL: test_x86_avx_cvtdq2_pd_256: 558; AVX: # %bb.0: 559; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0] 560; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 561; 562; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256: 563; AVX512VL: # %bb.0: 564; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0] 565; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 567 ret <4 x double> %res 568} 569declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 570 571 572define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 573; AVX-LABEL: test_x86_sse2_cvtps2pd: 574; AVX: # %bb.0: 575; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 576; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 577; 578; AVX512VL-LABEL: test_x86_sse2_cvtps2pd: 579; AVX512VL: # %bb.0: 580; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 581; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 582 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 583 ret <2 x double> %res 584} 585declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 586 587 588define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 589; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256: 590; AVX: # %bb.0: 591; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0] 592; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593; 594; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256: 595; AVX512VL: # %bb.0: 596; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] 597; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 598 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 599 ret <4 x double> %res 600} 601declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 602 603 604define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 605 ; add operation forces the execution domain. 606; X86-AVX-LABEL: test_x86_sse2_storeu_dq: 607; X86-AVX: # %bb.0: 608; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 609; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 610; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 611; X86-AVX-NEXT: vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00] 612; X86-AVX-NEXT: retl # encoding: [0xc3] 613; 614; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 615; X86-AVX512VL: # %bb.0: 616; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 617; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 618; X86-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 619; X86-AVX512VL-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 620; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 621; 622; X64-AVX-LABEL: test_x86_sse2_storeu_dq: 623; X64-AVX: # %bb.0: 624; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 625; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 626; X64-AVX-NEXT: vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07] 627; X64-AVX-NEXT: retq # encoding: [0xc3] 628; 629; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 630; X64-AVX512VL: # %bb.0: 631; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 632; X64-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 633; X64-AVX512VL-NEXT: vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] 634; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 635 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 636 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 637 ret void 638} 639declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 640 641 642define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 643 ; fadd operation forces the execution domain. 644; X86-AVX-LABEL: test_x86_sse2_storeu_pd: 645; X86-AVX: # %bb.0: 646; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 647; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 648; X86-AVX-NEXT: vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 649; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 650; X86-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 651; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 652; X86-AVX-NEXT: vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00] 653; X86-AVX-NEXT: retl # encoding: [0xc3] 654; 655; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 656; X86-AVX512VL: # %bb.0: 657; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 658; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 659; X86-AVX512VL-NEXT: vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 660; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 661; X86-AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0] 662; X86-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 663; X86-AVX512VL-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 664; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 665; 666; X64-AVX-LABEL: test_x86_sse2_storeu_pd: 667; X64-AVX: # %bb.0: 668; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 669; X64-AVX-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 670; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 671; X64-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 672; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 673; X64-AVX-NEXT: vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07] 674; X64-AVX-NEXT: retq # encoding: [0xc3] 675; 676; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 677; X64-AVX512VL: # %bb.0: 678; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 679; X64-AVX512VL-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 680; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 681; X64-AVX512VL-NEXT: # xmm1 = xmm1[0],mem[0] 682; X64-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 683; X64-AVX512VL-NEXT: vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07] 684; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 685 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 686 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 687 ret void 688} 689declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 690 691 692define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 693; X86-AVX-LABEL: test_x86_sse_storeu_ps: 694; X86-AVX: # %bb.0: 695; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 696; X86-AVX-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 697; X86-AVX-NEXT: retl # encoding: [0xc3] 698; 699; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps: 700; X86-AVX512VL: # %bb.0: 701; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 702; X86-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 703; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 704; 705; X64-AVX-LABEL: test_x86_sse_storeu_ps: 706; X64-AVX: # %bb.0: 707; X64-AVX-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 708; X64-AVX-NEXT: retq # encoding: [0xc3] 709; 710; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps: 711; X64-AVX512VL: # %bb.0: 712; X64-AVX512VL-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 713; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 714 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 715 ret void 716} 717declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 718 719 720define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 721 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 722 ; add operation forces the execution domain. 723; X86-AVX-LABEL: test_x86_avx_storeu_dq_256: 724; X86-AVX: # %bb.0: 725; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 726; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 727; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1] 728; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 729; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 730; X86-AVX-NEXT: vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10] 731; X86-AVX-NEXT: vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10] 732; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 733; X86-AVX-NEXT: retl # encoding: [0xc3] 734; 735; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 736; X86-AVX512VL: # %bb.0: 737; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 738; X86-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 739; X86-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 740; X86-AVX512VL-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 741; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 742; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 743; 744; X64-AVX-LABEL: test_x86_avx_storeu_dq_256: 745; X64-AVX: # %bb.0: 746; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 747; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1] 748; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 749; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 750; X64-AVX-NEXT: vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10] 751; X64-AVX-NEXT: vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17] 752; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 753; X64-AVX-NEXT: retq # encoding: [0xc3] 754; 755; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 756; X64-AVX512VL: # %bb.0: 757; X64-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 758; X64-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 759; X64-AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07] 760; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 761; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 762 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 763 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 764 ret void 765} 766declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 767 768 769define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 770 ; add operation forces the execution domain. 771; X86-AVX-LABEL: test_x86_avx_storeu_pd_256: 772; X86-AVX: # %bb.0: 773; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 774; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 775; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 776; X86-AVX-NEXT: vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00] 777; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 778; X86-AVX-NEXT: retl # encoding: [0xc3] 779; 780; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 781; X86-AVX512VL: # %bb.0: 782; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 783; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 784; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 785; X86-AVX512VL-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00] 786; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 787; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 788; 789; X64-AVX-LABEL: test_x86_avx_storeu_pd_256: 790; X64-AVX: # %bb.0: 791; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 792; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 793; X64-AVX-NEXT: vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07] 794; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 795; X64-AVX-NEXT: retq # encoding: [0xc3] 796; 797; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 798; X64-AVX512VL: # %bb.0: 799; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 800; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 801; X64-AVX512VL-NEXT: vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07] 802; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 803; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 804 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 805 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 806 ret void 807} 808declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 809 810 811define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 812; X86-AVX-LABEL: test_x86_avx_storeu_ps_256: 813; X86-AVX: # %bb.0: 814; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 815; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00] 816; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 817; X86-AVX-NEXT: retl # encoding: [0xc3] 818; 819; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 820; X86-AVX512VL: # %bb.0: 821; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 822; X86-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] 823; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 824; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 825; 826; X64-AVX-LABEL: test_x86_avx_storeu_ps_256: 827; X64-AVX: # %bb.0: 828; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07] 829; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 830; X64-AVX-NEXT: retq # encoding: [0xc3] 831; 832; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 833; X64-AVX512VL: # %bb.0: 834; X64-AVX512VL-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 835; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 836; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 837 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 838 ret void 839} 840declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 841 842 843define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 844; AVX-LABEL: test_x86_avx_vpermil_pd: 845; AVX: # %bb.0: 846; AVX-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 847; AVX-NEXT: # xmm0 = xmm0[1,0] 848; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 849; 850; AVX512VL-LABEL: test_x86_avx_vpermil_pd: 851; AVX512VL: # %bb.0: 852; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 853; AVX512VL-NEXT: # xmm0 = xmm0[1,0] 854; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 855 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 856 ret <2 x double> %res 857} 858declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 859 860 861define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 862; AVX-LABEL: test_x86_avx_vpermil_pd_256: 863; AVX: # %bb.0: 864; AVX-NEXT: vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 865; AVX-NEXT: # ymm0 = ymm0[1,1,3,2] 866; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 867; 868; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256: 869; AVX512VL: # %bb.0: 870; AVX512VL-NEXT: vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 871; AVX512VL-NEXT: # ymm0 = ymm0[1,1,3,2] 872; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 873 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 874 ret <4 x double> %res 875} 876declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 877 878 879define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 880; AVX-LABEL: test_x86_avx_vpermil_ps: 881; AVX: # %bb.0: 882; AVX-NEXT: vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 883; AVX-NEXT: # xmm0 = xmm0[3,1,0,0] 884; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 885; 886; AVX512VL-LABEL: test_x86_avx_vpermil_ps: 887; AVX512VL: # %bb.0: 888; AVX512VL-NEXT: vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 889; AVX512VL-NEXT: # xmm0 = xmm0[3,1,0,0] 890; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 891 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 892 ret <4 x float> %res 893} 894declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 895 896 897define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 898; AVX-LABEL: test_x86_avx_vpermil_ps_256: 899; AVX: # %bb.0: 900; AVX-NEXT: vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 901; AVX-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 902; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 903; 904; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256: 905; AVX512VL: # %bb.0: 906; AVX512VL-NEXT: vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 907; AVX512VL-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 908; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 909 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 910 ret <8 x float> %res 911} 912declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 913 914 915define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 916; AVX-LABEL: test_x86_avx_vperm2f128_pd_256: 917; AVX: # %bb.0: 918; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 919; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 920; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 921; 922; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256: 923; AVX512VL: # %bb.0: 924; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 925; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 926; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 927 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1] 928 ret <4 x double> %res 929} 930declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 931 932 933define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 934; AVX-LABEL: test_x86_avx_vperm2f128_ps_256: 935; AVX: # %bb.0: 936; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 937; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 938; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 939; 940; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256: 941; AVX512VL: # %bb.0: 942; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 943; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 944; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 945 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1] 946 ret <8 x float> %res 947} 948declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 949 950 951define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 952; AVX-LABEL: test_x86_avx_vperm2f128_si_256: 953; AVX: # %bb.0: 954; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 955; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 956; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 957; 958; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256: 959; AVX512VL: # %bb.0: 960; AVX512VL-NEXT: vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21] 961; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 962; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 963 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1] 964 ret <8 x i32> %res 965} 966declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 967 968 969define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 970; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: 971; AVX: # %bb.0: 972; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0] 973; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 974; 975; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: 976; AVX512VL: # %bb.0: 977; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] 978; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 979 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 980 ret <8 x float> %res 981} 982declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 983