1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL 6 7; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. 8 9define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 10; AVX-LABEL: test_x86_avx_sqrt_pd_256: 11; AVX: # %bb.0: 12; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] 13; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14; 15; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: 16; AVX512VL: # %bb.0: 17; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] 18; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 19 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 20 ret <4 x double> %res 21} 22declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 23 24define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 25; AVX-LABEL: test_x86_avx_sqrt_ps_256: 26; AVX: # %bb.0: 27; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] 28; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 29; 30; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: 31; AVX512VL: # %bb.0: 32; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] 33; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 34 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 35 ret <8 x float> %res 36} 37declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 38 39define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) { 40; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1: 41; AVX: # %bb.0: 42; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 43; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 44; 45; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1: 46; AVX512VL: # %bb.0: 47; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 48; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 49 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1) 50 ret <4 x double> %res 51} 52declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 53 54define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) { 55; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1: 56; AVX: # %bb.0: 57; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 58; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 59; 60; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1: 61; AVX512VL: # %bb.0: 62; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 63; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1) 65 ret <8 x float> %res 66} 67declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 68 69define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) { 70; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1: 71; AVX: # %bb.0: 72; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 73; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 74; 75; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1: 76; AVX512VL: # %bb.0: 77; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 78; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1) 80 ret <8 x i32> %res 81} 82 83; Verify that high bits of the immediate are masked off. This should be the equivalent 84; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's 85; not a vinsertf128 $1. 86define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) { 87; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2: 88; CHECK: # %bb.0: 89; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 90; CHECK-NEXT: vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0] 91; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 92; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 93 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2) 94 ret <8 x i32> %res 95} 96declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 97 98; We don't check any vextractf128 variant with immediate 0 because that's just a move. 99 100define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) { 101; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1: 102; AVX: # %bb.0: 103; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 104; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 105; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106; 107; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1: 108; AVX512VL: # %bb.0: 109; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 110; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 111; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 112 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1) 113 ret <2 x double> %res 114} 115declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 116 117define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) { 118; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1: 119; AVX: # %bb.0: 120; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 121; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 122; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 123; 124; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1: 125; AVX512VL: # %bb.0: 126; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 127; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 128; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 129 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1) 130 ret <4 x float> %res 131} 132declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 133 134define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) { 135; AVX-LABEL: test_x86_avx_vextractf128_si_256_1: 136; AVX: # %bb.0: 137; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 138; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 139; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 140; 141; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1: 142; AVX512VL: # %bb.0: 143; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] 144; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 145; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 146 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1) 147 ret <4 x i32> %res 148} 149declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 150 151; Verify that high bits of the immediate are masked off. This should be the equivalent 152; of a vextractf128 $0 which should be optimized away, so just check that it's 153; not a vextractf128 of any kind. 154define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) { 155; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2: 156; CHECK: # %bb.0: 157; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 158; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 159; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 160 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2) 161 ret <2 x double> %res 162} 163 164 165define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 166; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 167; X86-AVX: # %bb.0: 168; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 169; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 170; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 171; X86-AVX-NEXT: retl # encoding: [0xc3] 172; 173; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 174; X86-AVX512VL: # %bb.0: 175; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 176; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 177; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 178; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 179; 180; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256: 181; X64-AVX: # %bb.0: 182; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 183; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 184; X64-AVX-NEXT: retq # encoding: [0xc3] 185; 186; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256: 187; X64-AVX512VL: # %bb.0: 188; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 189; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 190; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 191 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 192 ret <4 x double> %res 193} 194declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 195 196 197define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 198; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 199; X86-AVX: # %bb.0: 200; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 201; X86-AVX-NEXT: vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 202; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 203; X86-AVX-NEXT: retl # encoding: [0xc3] 204; 205; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 206; X86-AVX512VL: # %bb.0: 207; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 208; X86-AVX512VL-NEXT: vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00] 209; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 210; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 211; 212; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256: 213; X64-AVX: # %bb.0: 214; X64-AVX-NEXT: vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 215; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 216; X64-AVX-NEXT: retq # encoding: [0xc3] 217; 218; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256: 219; X64-AVX512VL: # %bb.0: 220; X64-AVX512VL-NEXT: vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07] 221; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 222; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 223 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 224 ret <8 x float> %res 225} 226declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 227 228 229define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 230; CHECK-LABEL: test_x86_avx_blend_pd_256: 231; CHECK: # %bb.0: 232; CHECK-NEXT: vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0] 233; CHECK-NEXT: # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] 234; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 235 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 236 ret <4 x double> %res 237} 238declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 239 240 241define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 242; CHECK-LABEL: test_x86_avx_blend_ps_256: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07] 245; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 246; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 247 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 248 ret <8 x float> %res 249} 250declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 251 252 253define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 254; CHECK-LABEL: test_x86_avx_dp_ps_256: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07] 257; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 258 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 259 ret <8 x float> %res 260} 261declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 262 263 264define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 265; AVX-LABEL: test_x86_sse2_psll_dq: 266; AVX: # %bb.0: 267; AVX-NEXT: vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01] 268; AVX-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 269; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 270; 271; AVX512VL-LABEL: test_x86_sse2_psll_dq: 272; AVX512VL: # %bb.0: 273; AVX512VL-NEXT: vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01] 274; AVX512VL-NEXT: # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 275; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 276 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 277 ret <2 x i64> %res 278} 279declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 280 281 282define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 283; AVX-LABEL: test_x86_sse2_psrl_dq: 284; AVX: # %bb.0: 285; AVX-NEXT: vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01] 286; AVX-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 287; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 288; 289; AVX512VL-LABEL: test_x86_sse2_psrl_dq: 290; AVX512VL: # %bb.0: 291; AVX512VL-NEXT: vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01] 292; AVX512VL-NEXT: # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 293; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 295 ret <2 x i64> %res 296} 297declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 298 299 300define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 301; CHECK-LABEL: test_x86_sse41_blendpd: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03] 304; CHECK-NEXT: # xmm0 = xmm0[0,1],xmm1[2,3] 305; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 306 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1] 307 ret <2 x double> %res 308} 309declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone 310 311 312define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 313; CHECK-LABEL: test_x86_sse41_blendps: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 316; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3] 317; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 318 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 319 ret <4 x float> %res 320} 321declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone 322 323 324define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 325; CHECK-LABEL: test_x86_sse41_pblendw: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07] 328; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 329; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 330 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1] 331 ret <8 x i16> %res 332} 333declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone 334 335 336define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 337; AVX-LABEL: test_x86_sse41_pmovsxbd: 338; AVX: # %bb.0: 339; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0] 340; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 341; 342; AVX512VL-LABEL: test_x86_sse41_pmovsxbd: 343; AVX512VL: # %bb.0: 344; AVX512VL-NEXT: vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0] 345; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 347 ret <4 x i32> %res 348} 349declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 350 351 352define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 353; AVX-LABEL: test_x86_sse41_pmovsxbq: 354; AVX: # %bb.0: 355; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0] 356; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 357; 358; AVX512VL-LABEL: test_x86_sse41_pmovsxbq: 359; AVX512VL: # %bb.0: 360; AVX512VL-NEXT: vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0] 361; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 362 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 363 ret <2 x i64> %res 364} 365declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 366 367 368define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 369; AVX-LABEL: test_x86_sse41_pmovsxbw: 370; AVX: # %bb.0: 371; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0] 372; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 373; 374; AVX512VL-LABEL: test_x86_sse41_pmovsxbw: 375; AVX512VL: # %bb.0: 376; AVX512VL-NEXT: vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 377; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 378 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 379 ret <8 x i16> %res 380} 381declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 382 383 384define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 385; AVX-LABEL: test_x86_sse41_pmovsxdq: 386; AVX: # %bb.0: 387; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0] 388; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 389; 390; AVX512VL-LABEL: test_x86_sse41_pmovsxdq: 391; AVX512VL: # %bb.0: 392; AVX512VL-NEXT: vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 393; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 394 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 395 ret <2 x i64> %res 396} 397declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 398 399 400define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 401; AVX-LABEL: test_x86_sse41_pmovsxwd: 402; AVX: # %bb.0: 403; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0] 404; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405; 406; AVX512VL-LABEL: test_x86_sse41_pmovsxwd: 407; AVX512VL: # %bb.0: 408; AVX512VL-NEXT: vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0] 409; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 410 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 411 ret <4 x i32> %res 412} 413declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 414 415 416define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 417; AVX-LABEL: test_x86_sse41_pmovsxwq: 418; AVX: # %bb.0: 419; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0] 420; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 421; 422; AVX512VL-LABEL: test_x86_sse41_pmovsxwq: 423; AVX512VL: # %bb.0: 424; AVX512VL-NEXT: vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0] 425; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 427 ret <2 x i64> %res 428} 429declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 430 431 432define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 433; AVX-LABEL: test_x86_sse41_pmovzxbd: 434; AVX: # %bb.0: 435; AVX-NEXT: vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0] 436; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 437; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 438; 439; AVX512VL-LABEL: test_x86_sse41_pmovzxbd: 440; AVX512VL: # %bb.0: 441; AVX512VL-NEXT: vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0] 442; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 443; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 444 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 445 ret <4 x i32> %res 446} 447declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 448 449 450define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 451; AVX-LABEL: test_x86_sse41_pmovzxbq: 452; AVX: # %bb.0: 453; AVX-NEXT: vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0] 454; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 455; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456; 457; AVX512VL-LABEL: test_x86_sse41_pmovzxbq: 458; AVX512VL: # %bb.0: 459; AVX512VL-NEXT: vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0] 460; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 461; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 462 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 463 ret <2 x i64> %res 464} 465declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 466 467 468define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 469; AVX-LABEL: test_x86_sse41_pmovzxbw: 470; AVX: # %bb.0: 471; AVX-NEXT: vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0] 472; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 473; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 474; 475; AVX512VL-LABEL: test_x86_sse41_pmovzxbw: 476; AVX512VL: # %bb.0: 477; AVX512VL-NEXT: vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 478; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 479; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 480 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 481 ret <8 x i16> %res 482} 483declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 484 485 486define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 487; AVX-LABEL: test_x86_sse41_pmovzxdq: 488; AVX: # %bb.0: 489; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0] 490; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 491; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 492; 493; AVX512VL-LABEL: test_x86_sse41_pmovzxdq: 494; AVX512VL: # %bb.0: 495; AVX512VL-NEXT: vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0] 496; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero 497; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 498 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 499 ret <2 x i64> %res 500} 501declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 502 503 504define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 505; AVX-LABEL: test_x86_sse41_pmovzxwd: 506; AVX: # %bb.0: 507; AVX-NEXT: vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0] 508; AVX-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 509; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 510; 511; AVX512VL-LABEL: test_x86_sse41_pmovzxwd: 512; AVX512VL: # %bb.0: 513; AVX512VL-NEXT: vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] 514; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 515; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 516 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 517 ret <4 x i32> %res 518} 519declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 520 521 522define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 523; AVX-LABEL: test_x86_sse41_pmovzxwq: 524; AVX: # %bb.0: 525; AVX-NEXT: vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0] 526; AVX-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 527; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528; 529; AVX512VL-LABEL: test_x86_sse41_pmovzxwq: 530; AVX512VL: # %bb.0: 531; AVX512VL-NEXT: vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] 532; AVX512VL-NEXT: # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 533; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 535 ret <2 x i64> %res 536} 537declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 538 539 540define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 541; AVX-LABEL: test_x86_sse2_cvtdq2pd: 542; AVX: # %bb.0: 543; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 544; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545; 546; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd: 547; AVX512VL: # %bb.0: 548; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 549; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 550 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 551 ret <2 x double> %res 552} 553declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 554 555 556define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 557; AVX-LABEL: test_x86_avx_cvtdq2_pd_256: 558; AVX: # %bb.0: 559; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0] 560; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 561; 562; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256: 563; AVX512VL: # %bb.0: 564; AVX512VL-NEXT: vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0] 565; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 567 ret <4 x double> %res 568} 569declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 570 571 572define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 573; AVX-LABEL: test_x86_sse2_cvtps2pd: 574; AVX: # %bb.0: 575; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 576; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 577; 578; AVX512VL-LABEL: test_x86_sse2_cvtps2pd: 579; AVX512VL: # %bb.0: 580; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 581; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 582 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 583 ret <2 x double> %res 584} 585declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 586 587 588define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 589; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256: 590; AVX: # %bb.0: 591; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0] 592; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593; 594; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256: 595; AVX512VL: # %bb.0: 596; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] 597; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 598 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 599 ret <4 x double> %res 600} 601declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 602 603 604define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 605 ; add operation forces the execution domain. 606; X86-AVX-LABEL: test_x86_sse2_storeu_dq: 607; X86-AVX: # %bb.0: 608; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 609; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 610; X86-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 611; X86-AVX-NEXT: vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00] 612; X86-AVX-NEXT: retl # encoding: [0xc3] 613; 614; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 615; X86-AVX512VL: # %bb.0: 616; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 617; X86-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 618; X86-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 619; X86-AVX512VL-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 620; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 621; 622; X64-AVX-LABEL: test_x86_sse2_storeu_dq: 623; X64-AVX: # %bb.0: 624; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 625; X64-AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 626; X64-AVX-NEXT: vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07] 627; X64-AVX-NEXT: retq # encoding: [0xc3] 628; 629; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq: 630; X64-AVX512VL: # %bb.0: 631; X64-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9] 632; X64-AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 633; X64-AVX512VL-NEXT: vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] 634; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 635 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 636 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 637 ret void 638} 639declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 640 641 642define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 643 ; fadd operation forces the execution domain. 644; X86-AVX-LABEL: test_x86_sse2_storeu_pd: 645; X86-AVX: # %bb.0: 646; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 647; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 648; X86-AVX-NEXT: vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 649; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 650; X86-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 651; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 652; X86-AVX-NEXT: vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00] 653; X86-AVX-NEXT: retl # encoding: [0xc3] 654; 655; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 656; X86-AVX512VL: # %bb.0: 657; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 658; X86-AVX512VL-NEXT: vmovsd {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 659; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 660; X86-AVX512VL-NEXT: # xmm1 = mem[0],zero 661; X86-AVX512VL-NEXT: vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 662; X86-AVX512VL-NEXT: # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 663; X86-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 664; X86-AVX512VL-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 665; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 666; 667; X64-AVX-LABEL: test_x86_sse2_storeu_pd: 668; X64-AVX: # %bb.0: 669; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 670; X64-AVX-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 671; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 672; X64-AVX-NEXT: # xmm1 = xmm1[0],mem[0] 673; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 674; X64-AVX-NEXT: vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07] 675; X64-AVX-NEXT: retq # encoding: [0xc3] 676; 677; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd: 678; X64-AVX512VL: # %bb.0: 679; X64-AVX512VL-NEXT: vmovsd {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] 680; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 681; X64-AVX512VL-NEXT: # xmm1 = mem[0],zero 682; X64-AVX512VL-NEXT: vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] 683; X64-AVX512VL-NEXT: # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 684; X64-AVX512VL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 685; X64-AVX512VL-NEXT: vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07] 686; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 687 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 688 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 689 ret void 690} 691declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 692 693 694define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 695; X86-AVX-LABEL: test_x86_sse_storeu_ps: 696; X86-AVX: # %bb.0: 697; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 698; X86-AVX-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 699; X86-AVX-NEXT: retl # encoding: [0xc3] 700; 701; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps: 702; X86-AVX512VL: # %bb.0: 703; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 704; X86-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 705; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 706; 707; X64-AVX-LABEL: test_x86_sse_storeu_ps: 708; X64-AVX: # %bb.0: 709; X64-AVX-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 710; X64-AVX-NEXT: retq # encoding: [0xc3] 711; 712; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps: 713; X64-AVX512VL: # %bb.0: 714; X64-AVX512VL-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 715; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 716 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 717 ret void 718} 719declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 720 721 722define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 723 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 724 ; add operation forces the execution domain. 725; X86-AVX-LABEL: test_x86_avx_storeu_dq_256: 726; X86-AVX: # %bb.0: 727; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 728; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 729; X86-AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 730; X86-AVX-NEXT: vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca] 731; X86-AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2] 732; X86-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 733; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00] 734; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 735; X86-AVX-NEXT: retl # encoding: [0xc3] 736; 737; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 738; X86-AVX512VL: # %bb.0: 739; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 740; X86-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 741; X86-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 742; X86-AVX512VL-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 743; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 744; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 745; 746; X64-AVX-LABEL: test_x86_avx_storeu_dq_256: 747; X64-AVX: # %bb.0: 748; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] 749; X64-AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 750; X64-AVX-NEXT: vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca] 751; X64-AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2] 752; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] 753; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07] 754; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 755; X64-AVX-NEXT: retq # encoding: [0xc3] 756; 757; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256: 758; X64-AVX512VL: # %bb.0: 759; X64-AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 760; X64-AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1] 761; X64-AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07] 762; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 763; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 764 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 765 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 766 ret void 767} 768declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 769 770 771define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 772 ; add operation forces the execution domain. 773; X86-AVX-LABEL: test_x86_avx_storeu_pd_256: 774; X86-AVX: # %bb.0: 775; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 776; X86-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 777; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 778; X86-AVX-NEXT: vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00] 779; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 780; X86-AVX-NEXT: retl # encoding: [0xc3] 781; 782; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 783; X86-AVX512VL: # %bb.0: 784; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 785; X86-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 786; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 787; X86-AVX512VL-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00] 788; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 789; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 790; 791; X64-AVX-LABEL: test_x86_avx_storeu_pd_256: 792; X64-AVX: # %bb.0: 793; X64-AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9] 794; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] 795; X64-AVX-NEXT: vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07] 796; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 797; X64-AVX-NEXT: retq # encoding: [0xc3] 798; 799; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256: 800; X64-AVX512VL: # %bb.0: 801; X64-AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 802; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 803; X64-AVX512VL-NEXT: vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07] 804; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 805; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 806 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 807 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 808 ret void 809} 810declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 811 812 813define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 814; X86-AVX-LABEL: test_x86_avx_storeu_ps_256: 815; X86-AVX: # %bb.0: 816; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 817; X86-AVX-NEXT: vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00] 818; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 819; X86-AVX-NEXT: retl # encoding: [0xc3] 820; 821; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 822; X86-AVX512VL: # %bb.0: 823; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 824; X86-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] 825; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 826; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 827; 828; X64-AVX-LABEL: test_x86_avx_storeu_ps_256: 829; X64-AVX: # %bb.0: 830; X64-AVX-NEXT: vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07] 831; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 832; X64-AVX-NEXT: retq # encoding: [0xc3] 833; 834; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256: 835; X64-AVX512VL: # %bb.0: 836; X64-AVX512VL-NEXT: vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 837; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 838; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 839 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 840 ret void 841} 842declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 843 844 845define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 846; AVX-LABEL: test_x86_avx_vpermil_pd: 847; AVX: # %bb.0: 848; AVX-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 849; AVX-NEXT: # xmm0 = xmm0[1,0] 850; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 851; 852; AVX512VL-LABEL: test_x86_avx_vpermil_pd: 853; AVX512VL: # %bb.0: 854; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 855; AVX512VL-NEXT: # xmm0 = xmm0[1,0] 856; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 857 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 858 ret <2 x double> %res 859} 860declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 861 862 863define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 864; AVX-LABEL: test_x86_avx_vpermil_pd_256: 865; AVX: # %bb.0: 866; AVX-NEXT: vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 867; AVX-NEXT: # ymm0 = ymm0[1,1,3,2] 868; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 869; 870; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256: 871; AVX512VL: # %bb.0: 872; AVX512VL-NEXT: vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07] 873; AVX512VL-NEXT: # ymm0 = ymm0[1,1,3,2] 874; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 875 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 876 ret <4 x double> %res 877} 878declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 879 880 881define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 882; AVX-LABEL: test_x86_avx_vpermil_ps: 883; AVX: # %bb.0: 884; AVX-NEXT: vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 885; AVX-NEXT: # xmm0 = xmm0[3,1,0,0] 886; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 887; 888; AVX512VL-LABEL: test_x86_avx_vpermil_ps: 889; AVX512VL: # %bb.0: 890; AVX512VL-NEXT: vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07] 891; AVX512VL-NEXT: # xmm0 = xmm0[3,1,0,0] 892; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 893 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 894 ret <4 x float> %res 895} 896declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 897 898 899define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 900; AVX-LABEL: test_x86_avx_vpermil_ps_256: 901; AVX: # %bb.0: 902; AVX-NEXT: vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 903; AVX-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 904; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 905; 906; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256: 907; AVX512VL: # %bb.0: 908; AVX512VL-NEXT: vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07] 909; AVX512VL-NEXT: # ymm0 = ymm0[3,1,0,0,7,5,4,4] 910; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 911 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 912 ret <8 x float> %res 913} 914declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 915 916 917define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 918; AVX-LABEL: test_x86_avx_vperm2f128_pd_256: 919; AVX: # %bb.0: 920; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 921; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 922; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 923; 924; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256: 925; AVX512VL: # %bb.0: 926; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 927; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 928; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 929 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1] 930 ret <4 x double> %res 931} 932declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 933 934 935define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 936; AVX-LABEL: test_x86_avx_vperm2f128_ps_256: 937; AVX: # %bb.0: 938; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 939; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 940; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 941; 942; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256: 943; AVX512VL: # %bb.0: 944; AVX512VL-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 945; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 946; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 947 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1] 948 ret <8 x float> %res 949} 950declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 951 952 953define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 954; AVX-LABEL: test_x86_avx_vperm2f128_si_256: 955; AVX: # %bb.0: 956; AVX-NEXT: vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21] 957; AVX-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 958; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 959; 960; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256: 961; AVX512VL: # %bb.0: 962; AVX512VL-NEXT: vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21] 963; AVX512VL-NEXT: # ymm0 = ymm1[2,3],ymm0[0,1] 964; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 965 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1] 966 ret <8 x i32> %res 967} 968declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 969 970 971define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 972; AVX-LABEL: test_x86_avx_cvtdq2_ps_256: 973; AVX: # %bb.0: 974; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0] 975; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 976; 977; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: 978; AVX512VL: # %bb.0: 979; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] 980; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 981 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 982 ret <8 x float> %res 983} 984declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 985