1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 10; SSE-LABEL: test_x86_sse3_addsub_pd: 11; SSE: ## %bb.0: 12; SSE-NEXT: addsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd0,0xc1] 13; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 14; 15; AVX-LABEL: test_x86_sse3_addsub_pd: 16; AVX: ## %bb.0: 17; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd0,0xc1] 18; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 19 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 20 ret <2 x double> %res 21} 22declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 23 24 25define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 26; SSE-LABEL: test_x86_sse3_addsub_ps: 27; SSE: ## %bb.0: 28; SSE-NEXT: addsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xd0,0xc1] 29; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 30; 31; AVX-LABEL: test_x86_sse3_addsub_ps: 32; AVX: ## %bb.0: 33; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xd0,0xc1] 34; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 35 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 36 ret <4 x float> %res 37} 38declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 39 40 41define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 42; SSE-LABEL: test_x86_sse3_hadd_pd: 43; SSE: ## %bb.0: 44; SSE-NEXT: haddpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7c,0xc1] 45; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 46; 47; AVX-LABEL: test_x86_sse3_hadd_pd: 48; AVX: ## %bb.0: 49; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7c,0xc1] 50; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 51 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 52 ret <2 x double> %res 53} 54declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 55 56 57define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 58; SSE-LABEL: test_x86_sse3_hadd_ps: 59; SSE: ## %bb.0: 60; SSE-NEXT: haddps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7c,0xc1] 61; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 62; 63; AVX-LABEL: test_x86_sse3_hadd_ps: 64; AVX: ## %bb.0: 65; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7c,0xc1] 66; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 67 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 68 ret <4 x float> %res 69} 70declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 71 72 73define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 74; SSE-LABEL: test_x86_sse3_hsub_pd: 75; SSE: ## %bb.0: 76; SSE-NEXT: hsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7d,0xc1] 77; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 78; 79; AVX-LABEL: test_x86_sse3_hsub_pd: 80; AVX: ## %bb.0: 81; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7d,0xc1] 82; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 83 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 84 ret <2 x double> %res 85} 86declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 87 88 89define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 90; SSE-LABEL: test_x86_sse3_hsub_ps: 91; SSE: ## %bb.0: 92; SSE-NEXT: hsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7d,0xc1] 93; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 94; 95; AVX-LABEL: test_x86_sse3_hsub_ps: 96; AVX: ## %bb.0: 97; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7d,0xc1] 98; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 99 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 100 ret <4 x float> %res 101} 102declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 103 104 105define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 106; X86-SSE-LABEL: test_x86_sse3_ldu_dq: 107; X86-SSE: ## %bb.0: 108; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 109; X86-SSE-NEXT: lddqu (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x00] 110; X86-SSE-NEXT: retl ## encoding: [0xc3] 111; 112; X86-AVX-LABEL: test_x86_sse3_ldu_dq: 113; X86-AVX: ## %bb.0: 114; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 115; X86-AVX-NEXT: vlddqu (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x00] 116; X86-AVX-NEXT: retl ## encoding: [0xc3] 117; 118; X64-SSE-LABEL: test_x86_sse3_ldu_dq: 119; X64-SSE: ## %bb.0: 120; X64-SSE-NEXT: lddqu (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x07] 121; X64-SSE-NEXT: retq ## encoding: [0xc3] 122; 123; X64-AVX-LABEL: test_x86_sse3_ldu_dq: 124; X64-AVX: ## %bb.0: 125; X64-AVX-NEXT: vlddqu (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x07] 126; X64-AVX-NEXT: retq ## encoding: [0xc3] 127 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 128 ret <16 x i8> %res 129} 130declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 131 132; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 133 134define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 135; X86-LABEL: monitor: 136; X86: ## %bb.0: 137; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c] 138; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] 139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 140; X86-NEXT: leal (%eax), %eax ## encoding: [0x8d,0x00] 141; X86-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] 142; X86-NEXT: retl ## encoding: [0xc3] 143; 144; X64-LABEL: monitor: 145; X64: ## %bb.0: 146; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07] 147; X64-NEXT: movl %esi, %ecx ## encoding: [0x89,0xf1] 148; X64-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] 149; X64-NEXT: retq ## encoding: [0xc3] 150 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 151 ret void 152} 153declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 154 155define void @mwait(i32 %E, i32 %H) nounwind { 156; X86-LABEL: mwait: 157; X86: ## %bb.0: 158; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] 159; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] 160; X86-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] 161; X86-NEXT: retl ## encoding: [0xc3] 162; 163; X64-LABEL: mwait: 164; X64: ## %bb.0: 165; X64-NEXT: movl %edi, %ecx ## encoding: [0x89,0xf9] 166; X64-NEXT: movl %esi, %eax ## encoding: [0x89,0xf0] 167; X64-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] 168; X64-NEXT: retq ## encoding: [0xc3] 169 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 170 ret void 171} 172declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 173