1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE 3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX1,X86-AVX1 4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX512,X86-AVX512 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX1,X64-AVX1 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX512,X64-AVX512 8 9 10define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 11; SSE-LABEL: test_x86_sse2_sqrt_pd: 12; SSE: ## %bb.0: 13; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0] 14; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 15; 16; AVX1-LABEL: test_x86_sse2_sqrt_pd: 17; AVX1: ## %bb.0: 18; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] 19; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 20; 21; AVX512-LABEL: test_x86_sse2_sqrt_pd: 22; AVX512: ## %bb.0: 23; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] 24; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 25 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 26 ret <2 x double> %res 27} 28declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 29 30 31define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 32; SSE-LABEL: test_x86_sse2_sqrt_sd: 33; SSE: ## %bb.0: 34; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 35; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 36; 37; AVX1-LABEL: test_x86_sse2_sqrt_sd: 38; AVX1: ## %bb.0: 39; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 40; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 41; 42; AVX512-LABEL: test_x86_sse2_sqrt_sd: 43; AVX512: ## %bb.0: 44; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 45; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 46 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 47 ret <2 x double> %res 48} 49declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 50 51 52define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) { 53; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: 54; X86-SSE: ## %bb.0: 55; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 56; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00] 57; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 58; X86-SSE-NEXT: retl ## encoding: [0xc3] 59; 60; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: 61; X86-AVX1: ## %bb.0: 62; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 63; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00] 64; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 65; X86-AVX1-NEXT: retl ## encoding: [0xc3] 66; 67; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: 68; X86-AVX512: ## %bb.0: 69; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 70; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] 71; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 72; X86-AVX512-NEXT: retl ## encoding: [0xc3] 73; 74; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: 75; X64-SSE: ## %bb.0: 76; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07] 77; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] 78; X64-SSE-NEXT: retq ## encoding: [0xc3] 79; 80; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: 81; X64-AVX1: ## %bb.0: 82; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07] 83; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] 84; X64-AVX1-NEXT: retq ## encoding: [0xc3] 85; 86; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: 87; X64-AVX512: ## %bb.0: 88; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] 89; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 90; X64-AVX512-NEXT: retq ## encoding: [0xc3] 91 %a1 = load <2 x double>, <2 x double>* %a0, align 16 92 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1] 93 ret <2 x double> %res 94} 95 96 97define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 98; SSE-LABEL: test_x86_sse2_psll_dq_bs: 99; SSE: ## %bb.0: 100; SSE-NEXT: pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07] 101; SSE-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 102; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 103; 104; AVX1-LABEL: test_x86_sse2_psll_dq_bs: 105; AVX1: ## %bb.0: 106; AVX1-NEXT: vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07] 107; AVX1-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 108; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 109; 110; AVX512-LABEL: test_x86_sse2_psll_dq_bs: 111; AVX512: ## %bb.0: 112; AVX512-NEXT: vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07] 113; AVX512-NEXT: ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] 114; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 115 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 116 ret <2 x i64> %res 117} 118declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 119 120 121define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 122; SSE-LABEL: test_x86_sse2_psrl_dq_bs: 123; SSE: ## %bb.0: 124; SSE-NEXT: psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07] 125; SSE-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 126; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 127; 128; AVX1-LABEL: test_x86_sse2_psrl_dq_bs: 129; AVX1: ## %bb.0: 130; AVX1-NEXT: vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07] 131; AVX1-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 132; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 133; 134; AVX512-LABEL: test_x86_sse2_psrl_dq_bs: 135; AVX512: ## %bb.0: 136; AVX512-NEXT: vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07] 137; AVX512-NEXT: ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero 138; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 139 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 140 ret <2 x i64> %res 141} 142declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 143 144define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 145; SSE-LABEL: test_x86_sse2_psll_dq: 146; SSE: ## %bb.0: 147; SSE-NEXT: pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01] 148; SSE-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 149; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 150; 151; AVX1-LABEL: test_x86_sse2_psll_dq: 152; AVX1: ## %bb.0: 153; AVX1-NEXT: vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01] 154; AVX1-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 155; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 156; 157; AVX512-LABEL: test_x86_sse2_psll_dq: 158; AVX512: ## %bb.0: 159; AVX512-NEXT: vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01] 160; AVX512-NEXT: ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 161; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 162 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 163 ret <2 x i64> %res 164} 165declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 166 167 168define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 169; SSE-LABEL: test_x86_sse2_psrl_dq: 170; SSE: ## %bb.0: 171; SSE-NEXT: psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01] 172; SSE-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 173; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 174; 175; AVX1-LABEL: test_x86_sse2_psrl_dq: 176; AVX1: ## %bb.0: 177; AVX1-NEXT: vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01] 178; AVX1-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 179; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 180; 181; AVX512-LABEL: test_x86_sse2_psrl_dq: 182; AVX512: ## %bb.0: 183; AVX512-NEXT: vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01] 184; AVX512-NEXT: ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 185; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 186 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 187 ret <2 x i64> %res 188} 189declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 190 191 192define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 193; SSE-LABEL: test_x86_sse2_cvtdq2pd: 194; SSE: ## %bb.0: 195; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0] 196; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 197; 198; AVX1-LABEL: test_x86_sse2_cvtdq2pd: 199; AVX1: ## %bb.0: 200; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0] 201; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 202; 203; AVX512-LABEL: test_x86_sse2_cvtdq2pd: 204; AVX512: ## %bb.0: 205; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 206; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 207 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 208 ret <2 x double> %res 209} 210declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 211 212 213define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 214; SSE-LABEL: test_x86_sse2_cvtps2pd: 215; SSE: ## %bb.0: 216; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0] 217; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 218; 219; AVX1-LABEL: test_x86_sse2_cvtps2pd: 220; AVX1: ## %bb.0: 221; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0] 222; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 223; 224; AVX512-LABEL: test_x86_sse2_cvtps2pd: 225; AVX512: ## %bb.0: 226; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 227; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 228 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 229 ret <2 x double> %res 230} 231declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 232 233 234define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 235; X86-SSE-LABEL: test_x86_sse2_storel_dq: 236; X86-SSE: ## %bb.0: 237; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 238; X86-SSE-NEXT: movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00] 239; X86-SSE-NEXT: retl ## encoding: [0xc3] 240; 241; X86-AVX1-LABEL: test_x86_sse2_storel_dq: 242; X86-AVX1: ## %bb.0: 243; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 244; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00] 245; X86-AVX1-NEXT: retl ## encoding: [0xc3] 246; 247; X86-AVX512-LABEL: test_x86_sse2_storel_dq: 248; X86-AVX512: ## %bb.0: 249; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 250; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 251; X86-AVX512-NEXT: retl ## encoding: [0xc3] 252; 253; X64-SSE-LABEL: test_x86_sse2_storel_dq: 254; X64-SSE: ## %bb.0: 255; X64-SSE-NEXT: movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07] 256; X64-SSE-NEXT: retq ## encoding: [0xc3] 257; 258; X64-AVX1-LABEL: test_x86_sse2_storel_dq: 259; X64-AVX1: ## %bb.0: 260; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07] 261; X64-AVX1-NEXT: retq ## encoding: [0xc3] 262; 263; X64-AVX512-LABEL: test_x86_sse2_storel_dq: 264; X64-AVX512: ## %bb.0: 265; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] 266; X64-AVX512-NEXT: retq ## encoding: [0xc3] 267 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 268 ret void 269} 270declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 271 272 273define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 274 ; add operation forces the execution domain. 275; X86-SSE-LABEL: test_x86_sse2_storeu_dq: 276; X86-SSE: ## %bb.0: 277; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 278; X86-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9] 279; X86-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1] 280; X86-SSE-NEXT: movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00] 281; X86-SSE-NEXT: retl ## encoding: [0xc3] 282; 283; X86-AVX1-LABEL: test_x86_sse2_storeu_dq: 284; X86-AVX1: ## %bb.0: 285; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 286; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 287; X86-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1] 288; X86-AVX1-NEXT: vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00] 289; X86-AVX1-NEXT: retl ## encoding: [0xc3] 290; 291; X86-AVX512-LABEL: test_x86_sse2_storeu_dq: 292; X86-AVX512: ## %bb.0: 293; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 294; X86-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 295; X86-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 296; X86-AVX512-NEXT: vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 297; X86-AVX512-NEXT: retl ## encoding: [0xc3] 298; 299; X64-SSE-LABEL: test_x86_sse2_storeu_dq: 300; X64-SSE: ## %bb.0: 301; X64-SSE-NEXT: pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9] 302; X64-SSE-NEXT: psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1] 303; X64-SSE-NEXT: movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07] 304; X64-SSE-NEXT: retq ## encoding: [0xc3] 305; 306; X64-AVX1-LABEL: test_x86_sse2_storeu_dq: 307; X64-AVX1: ## %bb.0: 308; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 309; X64-AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1] 310; X64-AVX1-NEXT: vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07] 311; X64-AVX1-NEXT: retq ## encoding: [0xc3] 312; 313; X64-AVX512-LABEL: test_x86_sse2_storeu_dq: 314; X64-AVX512: ## %bb.0: 315; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9] 316; X64-AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 317; X64-AVX512-NEXT: vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] 318; X64-AVX512-NEXT: retq ## encoding: [0xc3] 319 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 320 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 321 ret void 322} 323declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 324 325 326define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 327 ; fadd operation forces the execution domain. 328; X86-SSE-LABEL: test_x86_sse2_storeu_pd: 329; X86-SSE: ## %bb.0: 330; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 331; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] 332; X86-SSE-NEXT: movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] 333; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 334; X86-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] 335; X86-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] 336; X86-SSE-NEXT: movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08] 337; X86-SSE-NEXT: retl ## encoding: [0xc3] 338; 339; X86-AVX1-LABEL: test_x86_sse2_storeu_pd: 340; X86-AVX1: ## %bb.0: 341; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 342; X86-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 343; X86-AVX1-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 344; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 345; X86-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] 346; X86-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] 347; X86-AVX1-NEXT: vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00] 348; X86-AVX1-NEXT: retl ## encoding: [0xc3] 349; 350; X86-AVX512-LABEL: test_x86_sse2_storeu_pd: 351; X86-AVX512: ## %bb.0: 352; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 353; X86-AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 354; X86-AVX512-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 355; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 356; X86-AVX512-NEXT: ## xmm1 = xmm1[0],mem[0] 357; X86-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 358; X86-AVX512-NEXT: vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 359; X86-AVX512-NEXT: retl ## encoding: [0xc3] 360; 361; X64-SSE-LABEL: test_x86_sse2_storeu_pd: 362; X64-SSE: ## %bb.0: 363; X64-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] 364; X64-SSE-NEXT: movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] 365; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 366; X64-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] 367; X64-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] 368; X64-SSE-NEXT: movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f] 369; X64-SSE-NEXT: retq ## encoding: [0xc3] 370; 371; X64-AVX1-LABEL: test_x86_sse2_storeu_pd: 372; X64-AVX1: ## %bb.0: 373; X64-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 374; X64-AVX1-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 375; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 376; X64-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] 377; X64-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] 378; X64-AVX1-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07] 379; X64-AVX1-NEXT: retq ## encoding: [0xc3] 380; 381; X64-AVX512-LABEL: test_x86_sse2_storeu_pd: 382; X64-AVX512: ## %bb.0: 383; X64-AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9] 384; X64-AVX512-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] 385; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte 386; X64-AVX512-NEXT: ## xmm1 = xmm1[0],mem[0] 387; X64-AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 388; X64-AVX512-NEXT: vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07] 389; X64-AVX512-NEXT: retq ## encoding: [0xc3] 390 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 391 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 392 ret void 393} 394declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 395 396define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { 397; SSE-LABEL: test_x86_sse2_pshuf_d: 398; SSE: ## %bb.0: ## %entry 399; SSE-NEXT: pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b] 400; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0] 401; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 402; 403; AVX1-LABEL: test_x86_sse2_pshuf_d: 404; AVX1: ## %bb.0: ## %entry 405; AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 406; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0] 407; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 408; 409; AVX512-LABEL: test_x86_sse2_pshuf_d: 410; AVX512: ## %bb.0: ## %entry 411; AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 412; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0] 413; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 414entry: 415 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone 416 ret <4 x i32> %res 417} 418declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone 419 420define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { 421; SSE-LABEL: test_x86_sse2_pshufl_w: 422; SSE: ## %bb.0: ## %entry 423; SSE-NEXT: pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b] 424; SSE-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 425; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 426; 427; AVX1-LABEL: test_x86_sse2_pshufl_w: 428; AVX1: ## %bb.0: ## %entry 429; AVX1-NEXT: vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b] 430; AVX1-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 431; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 432; 433; AVX512-LABEL: test_x86_sse2_pshufl_w: 434; AVX512: ## %bb.0: ## %entry 435; AVX512-NEXT: vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b] 436; AVX512-NEXT: ## xmm0 = xmm0[3,2,1,0,4,5,6,7] 437; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 438entry: 439 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone 440 ret <8 x i16> %res 441} 442declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone 443 444define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { 445; SSE-LABEL: test_x86_sse2_pshufh_w: 446; SSE: ## %bb.0: ## %entry 447; SSE-NEXT: pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b] 448; SSE-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 449; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 450; 451; AVX1-LABEL: test_x86_sse2_pshufh_w: 452; AVX1: ## %bb.0: ## %entry 453; AVX1-NEXT: vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b] 454; AVX1-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 455; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 456; 457; AVX512-LABEL: test_x86_sse2_pshufh_w: 458; AVX512: ## %bb.0: ## %entry 459; AVX512-NEXT: vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b] 460; AVX512-NEXT: ## xmm0 = xmm0[0,1,2,3,7,6,5,4] 461; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 462entry: 463 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone 464 ret <8 x i16> %res 465} 466declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone 467 468define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) { 469; SSE-LABEL: max_epu8: 470; SSE: ## %bb.0: 471; SSE-NEXT: pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1] 472; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 473; 474; AVX1-LABEL: max_epu8: 475; AVX1: ## %bb.0: 476; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1] 477; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 478; 479; AVX512-LABEL: max_epu8: 480; AVX512: ## %bb.0: 481; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] 482; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 483 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) 484 ret <16 x i8> %res 485} 486declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 487 488define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) { 489; SSE-LABEL: min_epu8: 490; SSE: ## %bb.0: 491; SSE-NEXT: pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1] 492; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 493; 494; AVX1-LABEL: min_epu8: 495; AVX1: ## %bb.0: 496; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1] 497; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 498; 499; AVX512-LABEL: min_epu8: 500; AVX512: ## %bb.0: 501; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] 502; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 503 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) 504 ret <16 x i8> %res 505} 506declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 507 508define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) { 509; SSE-LABEL: max_epi16: 510; SSE: ## %bb.0: 511; SSE-NEXT: pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1] 512; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 513; 514; AVX1-LABEL: max_epi16: 515; AVX1: ## %bb.0: 516; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1] 517; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 518; 519; AVX512-LABEL: max_epi16: 520; AVX512: ## %bb.0: 521; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 522; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 523 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) 524 ret <8 x i16> %res 525} 526declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 527 528define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) { 529; SSE-LABEL: min_epi16: 530; SSE: ## %bb.0: 531; SSE-NEXT: pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1] 532; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 533; 534; AVX1-LABEL: min_epi16: 535; AVX1: ## %bb.0: 536; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1] 537; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 538; 539; AVX512-LABEL: min_epi16: 540; AVX512: ## %bb.0: 541; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 542; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 543 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) 544 ret <8 x i16> %res 545} 546declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 547 548define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 549; SSE-LABEL: test_x86_sse2_add_sd: 550; SSE: ## %bb.0: 551; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1] 552; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 553; 554; AVX1-LABEL: test_x86_sse2_add_sd: 555; AVX1: ## %bb.0: 556; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1] 557; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 558; 559; AVX512-LABEL: test_x86_sse2_add_sd: 560; AVX512: ## %bb.0: 561; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] 562; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 563 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 564 ret <2 x double> %res 565} 566declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 567 568 569define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 570; SSE-LABEL: test_x86_sse2_sub_sd: 571; SSE: ## %bb.0: 572; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1] 573; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 574; 575; AVX1-LABEL: test_x86_sse2_sub_sd: 576; AVX1: ## %bb.0: 577; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1] 578; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 579; 580; AVX512-LABEL: test_x86_sse2_sub_sd: 581; AVX512: ## %bb.0: 582; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] 583; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 584 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 585 ret <2 x double> %res 586} 587declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 588 589 590define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 591; SSE-LABEL: test_x86_sse2_mul_sd: 592; SSE: ## %bb.0: 593; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1] 594; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 595; 596; AVX1-LABEL: test_x86_sse2_mul_sd: 597; AVX1: ## %bb.0: 598; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1] 599; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 600; 601; AVX512-LABEL: test_x86_sse2_mul_sd: 602; AVX512: ## %bb.0: 603; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] 604; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 605 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 606 ret <2 x double> %res 607} 608declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 609 610 611define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 612; SSE-LABEL: test_x86_sse2_div_sd: 613; SSE: ## %bb.0: 614; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1] 615; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 616; 617; AVX1-LABEL: test_x86_sse2_div_sd: 618; AVX1: ## %bb.0: 619; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1] 620; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 621; 622; AVX512-LABEL: test_x86_sse2_div_sd: 623; AVX512: ## %bb.0: 624; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] 625; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 626 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 627 ret <2 x double> %res 628} 629declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 630 631 632define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 633; SSE-LABEL: test_x86_sse2_pmulu_dq: 634; SSE: ## %bb.0: 635; SSE-NEXT: pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1] 636; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 637; 638; AVX1-LABEL: test_x86_sse2_pmulu_dq: 639; AVX1: ## %bb.0: 640; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1] 641; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 642; 643; AVX512-LABEL: test_x86_sse2_pmulu_dq: 644; AVX512: ## %bb.0: 645; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] 646; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 647 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 648 ret <2 x i64> %res 649} 650declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 651 652 653define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { 654; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd: 655; X86-SSE: ## %bb.0: 656; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] 657; X86-SSE-NEXT: retl ## encoding: [0xc3] 658; 659; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd: 660; X86-AVX1: ## %bb.0: 661; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 662; X86-AVX1-NEXT: retl ## encoding: [0xc3] 663; 664; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd: 665; X86-AVX512: ## %bb.0: 666; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 667; X86-AVX512-NEXT: retl ## encoding: [0xc3] 668; 669; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd: 670; X64-SSE: ## %bb.0: 671; X64-SSE-NEXT: cvtsi2sd %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7] 672; X64-SSE-NEXT: retq ## encoding: [0xc3] 673; 674; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd: 675; X64-AVX1: ## %bb.0: 676; X64-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7] 677; X64-AVX1-NEXT: retq ## encoding: [0xc3] 678; 679; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd: 680; X64-AVX512: ## %bb.0: 681; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] 682; X64-AVX512-NEXT: retq ## encoding: [0xc3] 683 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] 684 ret <2 x double> %res 685} 686declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 687 688 689define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 690; SSE-LABEL: test_x86_sse2_cvtss2sd: 691; SSE: ## %bb.0: 692; SSE-NEXT: cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1] 693; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 694; 695; AVX1-LABEL: test_x86_sse2_cvtss2sd: 696; AVX1: ## %bb.0: 697; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1] 698; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 699; 700; AVX512-LABEL: test_x86_sse2_cvtss2sd: 701; AVX512: ## %bb.0: 702; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] 703; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 704 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 705 ret <2 x double> %res 706} 707declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 708 709 710define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) { 711; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load: 712; X86-SSE: ## %bb.0: 713; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 714; X86-SSE-NEXT: movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08] 715; X86-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero 716; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] 717; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 718; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 719; X86-SSE-NEXT: retl ## encoding: [0xc3] 720; 721; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: 722; X86-AVX1: ## %bb.0: 723; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 724; X86-AVX1-NEXT: vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08] 725; X86-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero 726; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] 727; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 728; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 729; X86-AVX1-NEXT: retl ## encoding: [0xc3] 730; 731; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: 732; X86-AVX512: ## %bb.0: 733; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 734; X86-AVX512-NEXT: vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08] 735; X86-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero 736; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] 737; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 738; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 739; X86-AVX512-NEXT: retl ## encoding: [0xc3] 740; 741; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load: 742; X64-SSE: ## %bb.0: 743; X64-SSE-NEXT: movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f] 744; X64-SSE-NEXT: ## xmm1 = mem[0],zero,zero,zero 745; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] 746; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 747; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 748; X64-SSE-NEXT: retq ## encoding: [0xc3] 749; 750; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: 751; X64-AVX1: ## %bb.0: 752; X64-AVX1-NEXT: vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f] 753; X64-AVX1-NEXT: ## xmm1 = mem[0],zero,zero,zero 754; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] 755; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 756; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 757; X64-AVX1-NEXT: retq ## encoding: [0xc3] 758; 759; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: 760; X64-AVX512: ## %bb.0: 761; X64-AVX512-NEXT: vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f] 762; X64-AVX512-NEXT: ## xmm1 = mem[0],zero,zero,zero 763; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] 764; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 765; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] 766; X64-AVX512-NEXT: retq ## encoding: [0xc3] 767 %a1 = load <4 x float>, <4 x float>* %p1 768 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 769 ret <2 x double> %res 770} 771 772 773define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize { 774; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 775; X86-SSE: ## %bb.0: 776; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 777; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08] 778; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 779; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 780; X86-SSE-NEXT: retl ## encoding: [0xc3] 781; 782; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 783; X86-AVX1: ## %bb.0: 784; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 785; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08] 786; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] 787; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] 788; X86-AVX1-NEXT: retl ## encoding: [0xc3] 789; 790; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 791; X86-AVX512: ## %bb.0: 792; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 793; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08] 794; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] 795; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] 796; X86-AVX512-NEXT: retl ## encoding: [0xc3] 797; 798; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 799; X64-SSE: ## %bb.0: 800; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f] 801; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] 802; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] 803; X64-SSE-NEXT: retq ## encoding: [0xc3] 804; 805; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 806; X64-AVX1: ## %bb.0: 807; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f] 808; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] 809; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] 810; X64-AVX1-NEXT: retq ## encoding: [0xc3] 811; 812; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: 813; X64-AVX512: ## %bb.0: 814; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f] 815; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] 816; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] 817; X64-AVX512-NEXT: retq ## encoding: [0xc3] 818 %a1 = load <4 x float>, <4 x float>* %p1 819 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 820 ret <2 x double> %res 821} 822 823 824define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 825; SSE-LABEL: test_x86_sse2_cvtdq2ps: 826; SSE: ## %bb.0: 827; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0] 828; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 829; 830; AVX1-LABEL: test_x86_sse2_cvtdq2ps: 831; AVX1: ## %bb.0: 832; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0] 833; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 834; 835; AVX512-LABEL: test_x86_sse2_cvtdq2ps: 836; AVX512: ## %bb.0: 837; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 838; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 839 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 840 ret <4 x float> %res 841} 842declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 843 844 845define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 846; SSE-LABEL: test_x86_sse2_padds_b: 847; SSE: ## %bb.0: 848; SSE-NEXT: paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1] 849; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 850; 851; AVX1-LABEL: test_x86_sse2_padds_b: 852; AVX1: ## %bb.0: 853; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1] 854; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 855; 856; AVX512-LABEL: test_x86_sse2_padds_b: 857; AVX512: ## %bb.0: 858; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 859; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 860 %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 861 ret <16 x i8> %res 862} 863declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 864 865 866define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 867; SSE-LABEL: test_x86_sse2_padds_w: 868; SSE: ## %bb.0: 869; SSE-NEXT: paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1] 870; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 871; 872; AVX1-LABEL: test_x86_sse2_padds_w: 873; AVX1: ## %bb.0: 874; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1] 875; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 876; 877; AVX512-LABEL: test_x86_sse2_padds_w: 878; AVX512: ## %bb.0: 879; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 880; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 881 %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 882 ret <8 x i16> %res 883} 884declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 885 886 887define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 888; SSE-LABEL: test_x86_sse2_paddus_b: 889; SSE: ## %bb.0: 890; SSE-NEXT: paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1] 891; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 892; 893; AVX1-LABEL: test_x86_sse2_paddus_b: 894; AVX1: ## %bb.0: 895; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1] 896; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 897; 898; AVX512-LABEL: test_x86_sse2_paddus_b: 899; AVX512: ## %bb.0: 900; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 901; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 902; AVX2-LABEL: test_x86_sse2_paddus_b: 903; AVX2: ## %bb.0: 904; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1] 905; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 906; SKX-LABEL: test_x86_sse2_paddus_b: 907; SKX: ## %bb.0: 908; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 909; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 910 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 911 ret <16 x i8> %res 912} 913declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 914 915 916define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 917; SSE-LABEL: test_x86_sse2_paddus_w: 918; SSE: ## %bb.0: 919; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1] 920; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 921; 922; AVX1-LABEL: test_x86_sse2_paddus_w: 923; AVX1: ## %bb.0: 924; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1] 925; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 926; 927; AVX512-LABEL: test_x86_sse2_paddus_w: 928; AVX512: ## %bb.0: 929; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 930; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 931; AVX2-LABEL: test_x86_sse2_paddus_w: 932; AVX2: ## %bb.0: 933; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1] 934; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 935; SKX-LABEL: test_x86_sse2_paddus_w: 936; SKX: ## %bb.0: 937; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 938; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 939 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 940 ret <8 x i16> %res 941} 942declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 943 944 945define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 946; SSE-LABEL: test_x86_sse2_psubs_b: 947; SSE: ## %bb.0: 948; SSE-NEXT: psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1] 949; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 950; 951; AVX1-LABEL: test_x86_sse2_psubs_b: 952; AVX1: ## %bb.0: 953; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1] 954; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 955; 956; AVX512-LABEL: test_x86_sse2_psubs_b: 957; AVX512: ## %bb.0: 958; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 959; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 960 %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 961 ret <16 x i8> %res 962} 963declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 964 965 966define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 967; SSE-LABEL: test_x86_sse2_psubs_w: 968; SSE: ## %bb.0: 969; SSE-NEXT: psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1] 970; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 971; 972; AVX1-LABEL: test_x86_sse2_psubs_w: 973; AVX1: ## %bb.0: 974; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1] 975; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 976; 977; AVX512-LABEL: test_x86_sse2_psubs_w: 978; AVX512: ## %bb.0: 979; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 980; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 981 %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 982 ret <8 x i16> %res 983} 984declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 985 986 987define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 988; SSE-LABEL: test_x86_sse2_psubus_b: 989; SSE: ## %bb.0: 990; SSE-NEXT: psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1] 991; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 992; 993; AVX1-LABEL: test_x86_sse2_psubus_b: 994; AVX1: ## %bb.0: 995; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1] 996; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 997; 998; AVX512-LABEL: test_x86_sse2_psubus_b: 999; AVX512: ## %bb.0: 1000; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 1001; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1002; AVX2-LABEL: test_x86_sse2_psubus_b: 1003; AVX2: ## %bb.0: 1004; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1] 1005; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1006; SKX-LABEL: test_x86_sse2_psubus_b: 1007; SKX: ## %bb.0: 1008; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 1009; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1010 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1011 ret <16 x i8> %res 1012} 1013declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 1014 1015 1016define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 1017; SSE-LABEL: test_x86_sse2_psubus_w: 1018; SSE: ## %bb.0: 1019; SSE-NEXT: psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1] 1020; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1021; 1022; AVX1-LABEL: test_x86_sse2_psubus_w: 1023; AVX1: ## %bb.0: 1024; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1] 1025; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1026; 1027; AVX512-LABEL: test_x86_sse2_psubus_w: 1028; AVX512: ## %bb.0: 1029; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 1030; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1031; AVX2-LABEL: test_x86_sse2_psubus_w: 1032; AVX2: ## %bb.0: 1033; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1] 1034; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1035; SKX-LABEL: test_x86_sse2_psubus_w: 1036; SKX: ## %bb.0: 1037; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 1038; SKX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 1039 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1040 ret <8 x i16> %res 1041} 1042declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 1043