1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 10 11define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 12; SSE-LABEL: test_mm_add_ps: 13; SSE: # %bb.0: 14; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] 15; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16; 17; AVX1-LABEL: test_mm_add_ps: 18; AVX1: # %bb.0: 19; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] 20; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21; 22; AVX512-LABEL: test_mm_add_ps: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 25; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %res = fadd <4 x float> %a0, %a1 27 ret <4 x float> %res 28} 29 30define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 31; SSE-LABEL: test_mm_add_ss: 32; SSE: # %bb.0: 33; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] 34; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 35; 36; AVX1-LABEL: test_mm_add_ss: 37; AVX1: # %bb.0: 38; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] 39; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 40; 41; AVX512-LABEL: test_mm_add_ss: 42; AVX512: # %bb.0: 43; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 44; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 %ext0 = extractelement <4 x float> %a0, i32 0 46 %ext1 = extractelement <4 x float> %a1, i32 0 47 %fadd = fadd float %ext0, %ext1 48 %res = insertelement <4 x float> %a0, float %fadd, i32 0 49 ret <4 x float> %res 50} 51 52define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 53; SSE-LABEL: test_mm_and_ps: 54; SSE: # %bb.0: 55; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 56; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 57; 58; AVX1-LABEL: test_mm_and_ps: 59; AVX1: # %bb.0: 60; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 61; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62; 63; AVX512-LABEL: test_mm_and_ps: 64; AVX512: # %bb.0: 65; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 66; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 68 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 69 %res = and <4 x i32> %arg0, %arg1 70 %bc = bitcast <4 x i32> %res to <4 x float> 71 ret <4 x float> %bc 72} 73 74define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 75; SSE-LABEL: test_mm_andnot_ps: 76; SSE: # %bb.0: 77; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 78; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79; 80; AVX1-LABEL: test_mm_andnot_ps: 81; AVX1: # %bb.0: 82; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 83; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 84; 85; AVX512-LABEL: test_mm_andnot_ps: 86; AVX512: # %bb.0: 87; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 88; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 89 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 90 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 91 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 92 %res = and <4 x i32> %not, %arg1 93 %bc = bitcast <4 x i32> %res to <4 x float> 94 ret <4 x float> %bc 95} 96 97define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 98; SSE-LABEL: test_mm_cmpeq_ps: 99; SSE: # %bb.0: 100; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] 101; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 102; 103; AVX1-LABEL: test_mm_cmpeq_ps: 104; AVX1: # %bb.0: 105; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] 106; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 107; 108; AVX512-LABEL: test_mm_cmpeq_ps: 109; AVX512: # %bb.0: 110; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] 111; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 112; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 113 %cmp = fcmp oeq <4 x float> %a0, %a1 114 %sext = sext <4 x i1> %cmp to <4 x i32> 115 %res = bitcast <4 x i32> %sext to <4 x float> 116 ret <4 x float> %res 117} 118 119define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 120; SSE-LABEL: test_mm_cmpeq_ss: 121; SSE: # %bb.0: 122; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] 123; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 124; 125; AVX-LABEL: test_mm_cmpeq_ss: 126; AVX: # %bb.0: 127; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] 128; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 129 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 130 ret <4 x float> %res 131} 132declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 133 134define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 135; SSE-LABEL: test_mm_cmpge_ps: 136; SSE: # %bb.0: 137; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] 138; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 139; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 140; 141; AVX1-LABEL: test_mm_cmpge_ps: 142; AVX1: # %bb.0: 143; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] 144; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 145; 146; AVX512-LABEL: test_mm_cmpge_ps: 147; AVX512: # %bb.0: 148; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] 149; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 150; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 151 %cmp = fcmp ole <4 x float> %a1, %a0 152 %sext = sext <4 x i1> %cmp to <4 x i32> 153 %res = bitcast <4 x i32> %sext to <4 x float> 154 ret <4 x float> %res 155} 156 157define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 158; SSE-LABEL: test_mm_cmpge_ss: 159; SSE: # %bb.0: 160; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] 161; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 162; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 163; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 164; 165; AVX-LABEL: test_mm_cmpge_ss: 166; AVX: # %bb.0: 167; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] 168; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 169; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 170; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 171 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 172 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 173 ret <4 x float> %res 174} 175 176define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 177; SSE-LABEL: test_mm_cmpgt_ps: 178; SSE: # %bb.0: 179; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] 180; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 181; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 182; 183; AVX1-LABEL: test_mm_cmpgt_ps: 184; AVX1: # %bb.0: 185; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] 186; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 187; 188; AVX512-LABEL: test_mm_cmpgt_ps: 189; AVX512: # %bb.0: 190; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] 191; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 192; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 193 %cmp = fcmp olt <4 x float> %a1, %a0 194 %sext = sext <4 x i1> %cmp to <4 x i32> 195 %res = bitcast <4 x i32> %sext to <4 x float> 196 ret <4 x float> %res 197} 198 199define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 200; SSE-LABEL: test_mm_cmpgt_ss: 201; SSE: # %bb.0: 202; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] 203; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 204; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 205; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 206; 207; AVX-LABEL: test_mm_cmpgt_ss: 208; AVX: # %bb.0: 209; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] 210; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 211; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 212; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 213 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 214 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 215 ret <4 x float> %res 216} 217 218define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 219; SSE-LABEL: test_mm_cmple_ps: 220; SSE: # %bb.0: 221; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] 222; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 223; 224; AVX1-LABEL: test_mm_cmple_ps: 225; AVX1: # %bb.0: 226; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] 227; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 228; 229; AVX512-LABEL: test_mm_cmple_ps: 230; AVX512: # %bb.0: 231; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 232; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 233; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 234 %cmp = fcmp ole <4 x float> %a0, %a1 235 %sext = sext <4 x i1> %cmp to <4 x i32> 236 %res = bitcast <4 x i32> %sext to <4 x float> 237 ret <4 x float> %res 238} 239 240define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 241; SSE-LABEL: test_mm_cmple_ss: 242; SSE: # %bb.0: 243; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] 244; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 245; 246; AVX-LABEL: test_mm_cmple_ss: 247; AVX: # %bb.0: 248; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] 249; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 250 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 251 ret <4 x float> %res 252} 253 254define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 255; SSE-LABEL: test_mm_cmplt_ps: 256; SSE: # %bb.0: 257; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] 258; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 259; 260; AVX1-LABEL: test_mm_cmplt_ps: 261; AVX1: # %bb.0: 262; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] 263; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 264; 265; AVX512-LABEL: test_mm_cmplt_ps: 266; AVX512: # %bb.0: 267; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 268; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 269; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 270 %cmp = fcmp olt <4 x float> %a0, %a1 271 %sext = sext <4 x i1> %cmp to <4 x i32> 272 %res = bitcast <4 x i32> %sext to <4 x float> 273 ret <4 x float> %res 274} 275 276define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 277; SSE-LABEL: test_mm_cmplt_ss: 278; SSE: # %bb.0: 279; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] 280; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 281; 282; AVX-LABEL: test_mm_cmplt_ss: 283; AVX: # %bb.0: 284; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] 285; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 286 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 287 ret <4 x float> %res 288} 289 290define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 291; SSE-LABEL: test_mm_cmpneq_ps: 292; SSE: # %bb.0: 293; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] 294; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 295; 296; AVX1-LABEL: test_mm_cmpneq_ps: 297; AVX1: # %bb.0: 298; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] 299; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 300; 301; AVX512-LABEL: test_mm_cmpneq_ps: 302; AVX512: # %bb.0: 303; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] 304; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 305; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 306 %cmp = fcmp une <4 x float> %a0, %a1 307 %sext = sext <4 x i1> %cmp to <4 x i32> 308 %res = bitcast <4 x i32> %sext to <4 x float> 309 ret <4 x float> %res 310} 311 312define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 313; SSE-LABEL: test_mm_cmpneq_ss: 314; SSE: # %bb.0: 315; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] 316; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 317; 318; AVX-LABEL: test_mm_cmpneq_ss: 319; AVX: # %bb.0: 320; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] 321; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 322 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 323 ret <4 x float> %res 324} 325 326define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 327; SSE-LABEL: test_mm_cmpnge_ps: 328; SSE: # %bb.0: 329; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] 330; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 331; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 332; 333; AVX1-LABEL: test_mm_cmpnge_ps: 334; AVX1: # %bb.0: 335; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] 336; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 337; 338; AVX512-LABEL: test_mm_cmpnge_ps: 339; AVX512: # %bb.0: 340; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] 341; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 342; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 343 %cmp = fcmp ugt <4 x float> %a1, %a0 344 %sext = sext <4 x i1> %cmp to <4 x i32> 345 %res = bitcast <4 x i32> %sext to <4 x float> 346 ret <4 x float> %res 347} 348 349define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 350; SSE-LABEL: test_mm_cmpnge_ss: 351; SSE: # %bb.0: 352; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] 353; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 354; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 355; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 356; 357; AVX-LABEL: test_mm_cmpnge_ss: 358; AVX: # %bb.0: 359; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] 360; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 361; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 362; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 363 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 364 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 365 ret <4 x float> %res 366} 367 368define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 369; SSE-LABEL: test_mm_cmpngt_ps: 370; SSE: # %bb.0: 371; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] 372; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 373; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 374; 375; AVX1-LABEL: test_mm_cmpngt_ps: 376; AVX1: # %bb.0: 377; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] 378; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 379; 380; AVX512-LABEL: test_mm_cmpngt_ps: 381; AVX512: # %bb.0: 382; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] 383; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 384; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 385 %cmp = fcmp uge <4 x float> %a1, %a0 386 %sext = sext <4 x i1> %cmp to <4 x i32> 387 %res = bitcast <4 x i32> %sext to <4 x float> 388 ret <4 x float> %res 389} 390 391define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 392; SSE-LABEL: test_mm_cmpngt_ss: 393; SSE: # %bb.0: 394; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] 395; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 396; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 397; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 398; 399; AVX-LABEL: test_mm_cmpngt_ss: 400; AVX: # %bb.0: 401; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] 402; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 403; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 404; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 405 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 406 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 407 ret <4 x float> %res 408} 409 410define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 411; SSE-LABEL: test_mm_cmpnle_ps: 412; SSE: # %bb.0: 413; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] 414; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 415; 416; AVX1-LABEL: test_mm_cmpnle_ps: 417; AVX1: # %bb.0: 418; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] 419; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 420; 421; AVX512-LABEL: test_mm_cmpnle_ps: 422; AVX512: # %bb.0: 423; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] 424; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 425; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426 %cmp = fcmp ugt <4 x float> %a0, %a1 427 %sext = sext <4 x i1> %cmp to <4 x i32> 428 %res = bitcast <4 x i32> %sext to <4 x float> 429 ret <4 x float> %res 430} 431 432define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 433; SSE-LABEL: test_mm_cmpnle_ss: 434; SSE: # %bb.0: 435; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] 436; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 437; 438; AVX-LABEL: test_mm_cmpnle_ss: 439; AVX: # %bb.0: 440; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] 441; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 442 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 443 ret <4 x float> %res 444} 445 446define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 447; SSE-LABEL: test_mm_cmpnlt_ps: 448; SSE: # %bb.0: 449; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] 450; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 451; 452; AVX1-LABEL: test_mm_cmpnlt_ps: 453; AVX1: # %bb.0: 454; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] 455; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456; 457; AVX512-LABEL: test_mm_cmpnlt_ps: 458; AVX512: # %bb.0: 459; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] 460; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 461; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 462 %cmp = fcmp uge <4 x float> %a0, %a1 463 %sext = sext <4 x i1> %cmp to <4 x i32> 464 %res = bitcast <4 x i32> %sext to <4 x float> 465 ret <4 x float> %res 466} 467 468define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 469; SSE-LABEL: test_mm_cmpnlt_ss: 470; SSE: # %bb.0: 471; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] 472; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 473; 474; AVX-LABEL: test_mm_cmpnlt_ss: 475; AVX: # %bb.0: 476; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] 477; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 478 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 479 ret <4 x float> %res 480} 481 482define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 483; SSE-LABEL: test_mm_cmpord_ps: 484; SSE: # %bb.0: 485; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] 486; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 487; 488; AVX1-LABEL: test_mm_cmpord_ps: 489; AVX1: # %bb.0: 490; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] 491; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 492; 493; AVX512-LABEL: test_mm_cmpord_ps: 494; AVX512: # %bb.0: 495; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] 496; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 497; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 498 %cmp = fcmp ord <4 x float> %a0, %a1 499 %sext = sext <4 x i1> %cmp to <4 x i32> 500 %res = bitcast <4 x i32> %sext to <4 x float> 501 ret <4 x float> %res 502} 503 504define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 505; SSE-LABEL: test_mm_cmpord_ss: 506; SSE: # %bb.0: 507; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] 508; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 509; 510; AVX-LABEL: test_mm_cmpord_ss: 511; AVX: # %bb.0: 512; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] 513; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 514 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 515 ret <4 x float> %res 516} 517 518define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 519; SSE-LABEL: test_mm_cmpunord_ps: 520; SSE: # %bb.0: 521; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] 522; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 523; 524; AVX1-LABEL: test_mm_cmpunord_ps: 525; AVX1: # %bb.0: 526; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] 527; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 528; 529; AVX512-LABEL: test_mm_cmpunord_ps: 530; AVX512: # %bb.0: 531; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] 532; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 533; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 534 %cmp = fcmp uno <4 x float> %a0, %a1 535 %sext = sext <4 x i1> %cmp to <4 x i32> 536 %res = bitcast <4 x i32> %sext to <4 x float> 537 ret <4 x float> %res 538} 539 540define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 541; SSE-LABEL: test_mm_cmpunord_ss: 542; SSE: # %bb.0: 543; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] 544; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 545; 546; AVX-LABEL: test_mm_cmpunord_ss: 547; AVX: # %bb.0: 548; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] 549; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 550 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 551 ret <4 x float> %res 552} 553 554define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 555; SSE-LABEL: test_mm_comieq_ss: 556; SSE: # %bb.0: 557; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 558; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 559; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 560; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 561; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 562; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 563; 564; AVX1-LABEL: test_mm_comieq_ss: 565; AVX1: # %bb.0: 566; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 567; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 568; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 569; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 570; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 571; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 572; 573; AVX512-LABEL: test_mm_comieq_ss: 574; AVX512: # %bb.0: 575; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 576; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 577; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 578; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 579; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 580; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 581 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 582 ret i32 %res 583} 584declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 585 586define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 587; SSE-LABEL: test_mm_comige_ss: 588; SSE: # %bb.0: 589; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 590; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 591; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 592; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 593; 594; AVX1-LABEL: test_mm_comige_ss: 595; AVX1: # %bb.0: 596; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 597; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 598; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 599; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 600; 601; AVX512-LABEL: test_mm_comige_ss: 602; AVX512: # %bb.0: 603; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 604; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 605; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 606; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 607 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 608 ret i32 %res 609} 610declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 611 612define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 613; SSE-LABEL: test_mm_comigt_ss: 614; SSE: # %bb.0: 615; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 616; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 617; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 618; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 619; 620; AVX1-LABEL: test_mm_comigt_ss: 621; AVX1: # %bb.0: 622; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 623; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 624; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 625; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 626; 627; AVX512-LABEL: test_mm_comigt_ss: 628; AVX512: # %bb.0: 629; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 630; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 631; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 632; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 633 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 634 ret i32 %res 635} 636declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 637 638define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 639; SSE-LABEL: test_mm_comile_ss: 640; SSE: # %bb.0: 641; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 642; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 643; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 644; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 645; 646; AVX1-LABEL: test_mm_comile_ss: 647; AVX1: # %bb.0: 648; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 649; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 650; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 651; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 652; 653; AVX512-LABEL: test_mm_comile_ss: 654; AVX512: # %bb.0: 655; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 656; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 657; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 658; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 659 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 660 ret i32 %res 661} 662declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 663 664define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 665; SSE-LABEL: test_mm_comilt_ss: 666; SSE: # %bb.0: 667; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 668; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 669; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 670; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 671; 672; AVX1-LABEL: test_mm_comilt_ss: 673; AVX1: # %bb.0: 674; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 675; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 676; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 677; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 678; 679; AVX512-LABEL: test_mm_comilt_ss: 680; AVX512: # %bb.0: 681; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 682; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 683; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 684; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 685 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 686 ret i32 %res 687} 688declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 689 690define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 691; SSE-LABEL: test_mm_comineq_ss: 692; SSE: # %bb.0: 693; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 694; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 695; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 696; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 697; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 698; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 699; 700; AVX1-LABEL: test_mm_comineq_ss: 701; AVX1: # %bb.0: 702; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 703; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 704; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 705; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 706; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 707; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 708; 709; AVX512-LABEL: test_mm_comineq_ss: 710; AVX512: # %bb.0: 711; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 712; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 713; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 714; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 715; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 716; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 717 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 718 ret i32 %res 719} 720declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 721 722define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 723; SSE-LABEL: test_mm_cvt_ss2si: 724; SSE: # %bb.0: 725; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 726; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 727; 728; AVX1-LABEL: test_mm_cvt_ss2si: 729; AVX1: # %bb.0: 730; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 731; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 732; 733; AVX512-LABEL: test_mm_cvt_ss2si: 734; AVX512: # %bb.0: 735; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 736; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 737 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 738 ret i32 %res 739} 740declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 741 742define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 743; X86-SSE-LABEL: test_mm_cvtsi32_ss: 744; X86-SSE: # %bb.0: 745; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 746; X86-SSE-NEXT: retl # encoding: [0xc3] 747; 748; X86-AVX1-LABEL: test_mm_cvtsi32_ss: 749; X86-AVX1: # %bb.0: 750; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 751; X86-AVX1-NEXT: retl # encoding: [0xc3] 752; 753; X86-AVX512-LABEL: test_mm_cvtsi32_ss: 754; X86-AVX512: # %bb.0: 755; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 756; X86-AVX512-NEXT: retl # encoding: [0xc3] 757; 758; X64-SSE-LABEL: test_mm_cvtsi32_ss: 759; X64-SSE: # %bb.0: 760; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] 761; X64-SSE-NEXT: retq # encoding: [0xc3] 762; 763; X64-AVX1-LABEL: test_mm_cvtsi32_ss: 764; X64-AVX1: # %bb.0: 765; X64-AVX1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] 766; X64-AVX1-NEXT: retq # encoding: [0xc3] 767; 768; X64-AVX512-LABEL: test_mm_cvtsi32_ss: 769; X64-AVX512: # %bb.0: 770; X64-AVX512-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 771; X64-AVX512-NEXT: retq # encoding: [0xc3] 772 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) 773 ret <4 x float> %res 774} 775declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 776 777define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 778; X86-SSE-LABEL: test_mm_cvtss_f32: 779; X86-SSE: # %bb.0: 780; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 781; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 782; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 783; X86-SSE-NEXT: popl %eax # encoding: [0x58] 784; X86-SSE-NEXT: retl # encoding: [0xc3] 785; 786; X86-AVX1-LABEL: test_mm_cvtss_f32: 787; X86-AVX1: # %bb.0: 788; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 789; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 790; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 791; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 792; X86-AVX1-NEXT: retl # encoding: [0xc3] 793; 794; X86-AVX512-LABEL: test_mm_cvtss_f32: 795; X86-AVX512: # %bb.0: 796; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 797; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 798; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 799; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 800; X86-AVX512-NEXT: retl # encoding: [0xc3] 801; 802; X64-LABEL: test_mm_cvtss_f32: 803; X64: # %bb.0: 804; X64-NEXT: retq # encoding: [0xc3] 805 %res = extractelement <4 x float> %a0, i32 0 806 ret float %res 807} 808 809define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 810; SSE-LABEL: test_mm_cvtss_si32: 811; SSE: # %bb.0: 812; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 813; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 814; 815; AVX1-LABEL: test_mm_cvtss_si32: 816; AVX1: # %bb.0: 817; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 818; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 819; 820; AVX512-LABEL: test_mm_cvtss_si32: 821; AVX512: # %bb.0: 822; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 823; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 824 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 825 ret i32 %res 826} 827 828define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 829; SSE-LABEL: test_mm_cvttss_si: 830; SSE: # %bb.0: 831; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 832; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 833; 834; AVX1-LABEL: test_mm_cvttss_si: 835; AVX1: # %bb.0: 836; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 837; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 838; 839; AVX512-LABEL: test_mm_cvttss_si: 840; AVX512: # %bb.0: 841; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 842; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 843 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 844 ret i32 %res 845} 846declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 847 848define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 849; SSE-LABEL: test_mm_cvttss_si32: 850; SSE: # %bb.0: 851; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 852; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 853; 854; AVX1-LABEL: test_mm_cvttss_si32: 855; AVX1: # %bb.0: 856; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 857; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 858; 859; AVX512-LABEL: test_mm_cvttss_si32: 860; AVX512: # %bb.0: 861; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 862; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 863 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 864 ret i32 %res 865} 866 867define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 868; SSE-LABEL: test_mm_div_ps: 869; SSE: # %bb.0: 870; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] 871; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 872; 873; AVX1-LABEL: test_mm_div_ps: 874; AVX1: # %bb.0: 875; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] 876; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 877; 878; AVX512-LABEL: test_mm_div_ps: 879; AVX512: # %bb.0: 880; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 881; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 882 %res = fdiv <4 x float> %a0, %a1 883 ret <4 x float> %res 884} 885 886define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 887; SSE-LABEL: test_mm_div_ss: 888; SSE: # %bb.0: 889; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] 890; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 891; 892; AVX1-LABEL: test_mm_div_ss: 893; AVX1: # %bb.0: 894; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] 895; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 896; 897; AVX512-LABEL: test_mm_div_ss: 898; AVX512: # %bb.0: 899; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 900; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 901 %ext0 = extractelement <4 x float> %a0, i32 0 902 %ext1 = extractelement <4 x float> %a1, i32 0 903 %fdiv = fdiv float %ext0, %ext1 904 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 905 ret <4 x float> %res 906} 907 908define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 909; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 910; X86-SSE: # %bb.0: 911; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 912; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 913; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 914; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 915; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 916; X86-SSE-NEXT: # imm = 0x1F80 917; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 918; X86-SSE-NEXT: retl # encoding: [0xc3] 919; 920; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 921; X86-AVX: # %bb.0: 922; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 923; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 924; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 925; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 926; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 927; X86-AVX-NEXT: # imm = 0x1F80 928; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 929; X86-AVX-NEXT: retl # encoding: [0xc3] 930; 931; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 932; X64-SSE: # %bb.0: 933; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 934; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 935; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 936; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 937; X64-SSE-NEXT: # imm = 0x1F80 938; X64-SSE-NEXT: retq # encoding: [0xc3] 939; 940; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 941; X64-AVX: # %bb.0: 942; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 943; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 944; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 945; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 946; X64-AVX-NEXT: # imm = 0x1F80 947; X64-AVX-NEXT: retq # encoding: [0xc3] 948 %1 = alloca i32, align 4 949 %2 = bitcast i32* %1 to i8* 950 call void @llvm.x86.sse.stmxcsr(i8* %2) 951 %3 = load i32, i32* %1, align 4 952 %4 = and i32 %3, 8064 953 ret i32 %4 954} 955declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 956 957define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 958; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 959; X86-SSE: # %bb.0: 960; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 961; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 962; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 963; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 964; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 965; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 966; X86-SSE-NEXT: retl # encoding: [0xc3] 967; 968; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 969; X86-AVX: # %bb.0: 970; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 971; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 972; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 973; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 974; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 975; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 976; X86-AVX-NEXT: retl # encoding: [0xc3] 977; 978; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 979; X64-SSE: # %bb.0: 980; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 981; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 982; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 983; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 984; X64-SSE-NEXT: retq # encoding: [0xc3] 985; 986; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 987; X64-AVX: # %bb.0: 988; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 989; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 990; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 991; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 992; X64-AVX-NEXT: retq # encoding: [0xc3] 993 %1 = alloca i32, align 4 994 %2 = bitcast i32* %1 to i8* 995 call void @llvm.x86.sse.stmxcsr(i8* %2) 996 %3 = load i32, i32* %1, align 4 997 %4 = and i32 %3, 63 998 ret i32 %4 999} 1000 1001define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 1002; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1003; X86-SSE: # %bb.0: 1004; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1005; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1006; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1007; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1008; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1009; X86-SSE-NEXT: # imm = 0x8000 1010; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1011; X86-SSE-NEXT: retl # encoding: [0xc3] 1012; 1013; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1014; X86-AVX: # %bb.0: 1015; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1016; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1017; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1018; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1019; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1020; X86-AVX-NEXT: # imm = 0x8000 1021; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1022; X86-AVX-NEXT: retl # encoding: [0xc3] 1023; 1024; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1025; X64-SSE: # %bb.0: 1026; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1027; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1028; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1029; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1030; X64-SSE-NEXT: # imm = 0x8000 1031; X64-SSE-NEXT: retq # encoding: [0xc3] 1032; 1033; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1034; X64-AVX: # %bb.0: 1035; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1036; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1037; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1038; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1039; X64-AVX-NEXT: # imm = 0x8000 1040; X64-AVX-NEXT: retq # encoding: [0xc3] 1041 %1 = alloca i32, align 4 1042 %2 = bitcast i32* %1 to i8* 1043 call void @llvm.x86.sse.stmxcsr(i8* %2) 1044 %3 = load i32, i32* %1, align 4 1045 %4 = and i32 %3, 32768 1046 ret i32 %4 1047} 1048 1049define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 1050; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1051; X86-SSE: # %bb.0: 1052; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1053; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1054; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1055; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1056; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1057; X86-SSE-NEXT: # imm = 0x6000 1058; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1059; X86-SSE-NEXT: retl # encoding: [0xc3] 1060; 1061; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1062; X86-AVX: # %bb.0: 1063; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1064; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1065; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1066; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1067; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1068; X86-AVX-NEXT: # imm = 0x6000 1069; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1070; X86-AVX-NEXT: retl # encoding: [0xc3] 1071; 1072; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1073; X64-SSE: # %bb.0: 1074; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1075; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1076; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1077; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1078; X64-SSE-NEXT: # imm = 0x6000 1079; X64-SSE-NEXT: retq # encoding: [0xc3] 1080; 1081; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1082; X64-AVX: # %bb.0: 1083; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1084; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1085; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1086; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1087; X64-AVX-NEXT: # imm = 0x6000 1088; X64-AVX-NEXT: retq # encoding: [0xc3] 1089 %1 = alloca i32, align 4 1090 %2 = bitcast i32* %1 to i8* 1091 call void @llvm.x86.sse.stmxcsr(i8* %2) 1092 %3 = load i32, i32* %1, align 4 1093 %4 = and i32 %3, 24576 1094 ret i32 %4 1095} 1096 1097define i32 @test_mm_getcsr() nounwind { 1098; X86-SSE-LABEL: test_mm_getcsr: 1099; X86-SSE: # %bb.0: 1100; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1101; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1102; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1103; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1104; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1105; X86-SSE-NEXT: retl # encoding: [0xc3] 1106; 1107; X86-AVX-LABEL: test_mm_getcsr: 1108; X86-AVX: # %bb.0: 1109; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1110; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1111; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1112; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1113; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1114; X86-AVX-NEXT: retl # encoding: [0xc3] 1115; 1116; X64-SSE-LABEL: test_mm_getcsr: 1117; X64-SSE: # %bb.0: 1118; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1119; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1120; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1121; X64-SSE-NEXT: retq # encoding: [0xc3] 1122; 1123; X64-AVX-LABEL: test_mm_getcsr: 1124; X64-AVX: # %bb.0: 1125; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1126; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1127; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1128; X64-AVX-NEXT: retq # encoding: [0xc3] 1129 %1 = alloca i32, align 4 1130 %2 = bitcast i32* %1 to i8* 1131 call void @llvm.x86.sse.stmxcsr(i8* %2) 1132 %3 = load i32, i32* %1, align 4 1133 ret i32 %3 1134} 1135 1136define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 1137; X86-SSE-LABEL: test_mm_load_ps: 1138; X86-SSE: # %bb.0: 1139; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1140; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1141; X86-SSE-NEXT: retl # encoding: [0xc3] 1142; 1143; X86-AVX1-LABEL: test_mm_load_ps: 1144; X86-AVX1: # %bb.0: 1145; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1146; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1147; X86-AVX1-NEXT: retl # encoding: [0xc3] 1148; 1149; X86-AVX512-LABEL: test_mm_load_ps: 1150; X86-AVX512: # %bb.0: 1151; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1152; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1153; X86-AVX512-NEXT: retl # encoding: [0xc3] 1154; 1155; X64-SSE-LABEL: test_mm_load_ps: 1156; X64-SSE: # %bb.0: 1157; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1158; X64-SSE-NEXT: retq # encoding: [0xc3] 1159; 1160; X64-AVX1-LABEL: test_mm_load_ps: 1161; X64-AVX1: # %bb.0: 1162; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1163; X64-AVX1-NEXT: retq # encoding: [0xc3] 1164; 1165; X64-AVX512-LABEL: test_mm_load_ps: 1166; X64-AVX512: # %bb.0: 1167; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1168; X64-AVX512-NEXT: retq # encoding: [0xc3] 1169 %arg0 = bitcast float* %a0 to <4 x float>* 1170 %res = load <4 x float>, <4 x float>* %arg0, align 16 1171 ret <4 x float> %res 1172} 1173 1174define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 1175; X86-SSE-LABEL: test_mm_load_ps1: 1176; X86-SSE: # %bb.0: 1177; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1178; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1179; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1180; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1181; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1182; X86-SSE-NEXT: retl # encoding: [0xc3] 1183; 1184; X86-AVX1-LABEL: test_mm_load_ps1: 1185; X86-AVX1: # %bb.0: 1186; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1187; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1188; X86-AVX1-NEXT: retl # encoding: [0xc3] 1189; 1190; X86-AVX512-LABEL: test_mm_load_ps1: 1191; X86-AVX512: # %bb.0: 1192; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1193; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1194; X86-AVX512-NEXT: retl # encoding: [0xc3] 1195; 1196; X64-SSE-LABEL: test_mm_load_ps1: 1197; X64-SSE: # %bb.0: 1198; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1199; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1200; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1201; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1202; X64-SSE-NEXT: retq # encoding: [0xc3] 1203; 1204; X64-AVX1-LABEL: test_mm_load_ps1: 1205; X64-AVX1: # %bb.0: 1206; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1207; X64-AVX1-NEXT: retq # encoding: [0xc3] 1208; 1209; X64-AVX512-LABEL: test_mm_load_ps1: 1210; X64-AVX512: # %bb.0: 1211; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1212; X64-AVX512-NEXT: retq # encoding: [0xc3] 1213 %ld = load float, float* %a0, align 4 1214 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1215 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1216 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1217 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1218 ret <4 x float> %res3 1219} 1220 1221define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 1222; X86-SSE-LABEL: test_mm_load_ss: 1223; X86-SSE: # %bb.0: 1224; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1225; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1226; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1227; X86-SSE-NEXT: retl # encoding: [0xc3] 1228; 1229; X86-AVX1-LABEL: test_mm_load_ss: 1230; X86-AVX1: # %bb.0: 1231; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1232; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 1233; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1234; X86-AVX1-NEXT: retl # encoding: [0xc3] 1235; 1236; X86-AVX512-LABEL: test_mm_load_ss: 1237; X86-AVX512: # %bb.0: 1238; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1239; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1240; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1241; X86-AVX512-NEXT: retl # encoding: [0xc3] 1242; 1243; X64-SSE-LABEL: test_mm_load_ss: 1244; X64-SSE: # %bb.0: 1245; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1246; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1247; X64-SSE-NEXT: retq # encoding: [0xc3] 1248; 1249; X64-AVX1-LABEL: test_mm_load_ss: 1250; X64-AVX1: # %bb.0: 1251; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 1252; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1253; X64-AVX1-NEXT: retq # encoding: [0xc3] 1254; 1255; X64-AVX512-LABEL: test_mm_load_ss: 1256; X64-AVX512: # %bb.0: 1257; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 1258; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1259; X64-AVX512-NEXT: retq # encoding: [0xc3] 1260 %ld = load float, float* %a0, align 1 1261 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1262 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1263 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1264 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1265 ret <4 x float> %res3 1266} 1267 1268define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1269; X86-SSE-LABEL: test_mm_load1_ps: 1270; X86-SSE: # %bb.0: 1271; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1272; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1273; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1274; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1275; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1276; X86-SSE-NEXT: retl # encoding: [0xc3] 1277; 1278; X86-AVX1-LABEL: test_mm_load1_ps: 1279; X86-AVX1: # %bb.0: 1280; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1281; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1282; X86-AVX1-NEXT: retl # encoding: [0xc3] 1283; 1284; X86-AVX512-LABEL: test_mm_load1_ps: 1285; X86-AVX512: # %bb.0: 1286; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1287; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1288; X86-AVX512-NEXT: retl # encoding: [0xc3] 1289; 1290; X64-SSE-LABEL: test_mm_load1_ps: 1291; X64-SSE: # %bb.0: 1292; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1293; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1294; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1295; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1296; X64-SSE-NEXT: retq # encoding: [0xc3] 1297; 1298; X64-AVX1-LABEL: test_mm_load1_ps: 1299; X64-AVX1: # %bb.0: 1300; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1301; X64-AVX1-NEXT: retq # encoding: [0xc3] 1302; 1303; X64-AVX512-LABEL: test_mm_load1_ps: 1304; X64-AVX512: # %bb.0: 1305; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1306; X64-AVX512-NEXT: retq # encoding: [0xc3] 1307 %ld = load float, float* %a0, align 4 1308 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1309 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1310 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1311 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1312 ret <4 x float> %res3 1313} 1314 1315define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1316; X86-SSE-LABEL: test_mm_loadh_pi: 1317; X86-SSE: # %bb.0: 1318; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1319; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] 1320; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1321; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] 1322; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1323; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1324; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1325; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1326; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1327; X86-SSE-NEXT: retl # encoding: [0xc3] 1328; 1329; X86-AVX1-LABEL: test_mm_loadh_pi: 1330; X86-AVX1: # %bb.0: 1331; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1332; X86-AVX1-NEXT: vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] 1333; X86-AVX1-NEXT: # xmm1 = mem[0],zero 1334; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1335; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1336; X86-AVX1-NEXT: retl # encoding: [0xc3] 1337; 1338; X86-AVX512-LABEL: test_mm_loadh_pi: 1339; X86-AVX512: # %bb.0: 1340; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1341; X86-AVX512-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] 1342; X86-AVX512-NEXT: # xmm1 = mem[0],zero 1343; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1344; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1345; X86-AVX512-NEXT: retl # encoding: [0xc3] 1346; 1347; X64-SSE-LABEL: test_mm_loadh_pi: 1348; X64-SSE: # %bb.0: 1349; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1350; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1351; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1352; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1353; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1354; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1355; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1356; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1357; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1358; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1359; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1360; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1361; X64-SSE-NEXT: retq # encoding: [0xc3] 1362; 1363; X64-AVX1-LABEL: test_mm_loadh_pi: 1364; X64-AVX1: # %bb.0: 1365; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] 1366; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 1367; X64-AVX1-NEXT: retq # encoding: [0xc3] 1368; 1369; X64-AVX512-LABEL: test_mm_loadh_pi: 1370; X64-AVX512: # %bb.0: 1371; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] 1372; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 1373; X64-AVX512-NEXT: retq # encoding: [0xc3] 1374 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1375 %ld = load <2 x float>, <2 x float>* %ptr 1376 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1377 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1378 ret <4 x float> %res 1379} 1380 1381define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1382; X86-SSE-LABEL: test_mm_loadl_pi: 1383; X86-SSE: # %bb.0: 1384; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1385; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] 1386; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1387; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] 1388; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1389; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1390; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1391; X86-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] 1392; X86-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] 1393; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1394; X86-SSE-NEXT: retl # encoding: [0xc3] 1395; 1396; X86-AVX1-LABEL: test_mm_loadl_pi: 1397; X86-AVX1: # %bb.0: 1398; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1399; X86-AVX1-NEXT: vmovsd (%eax), %xmm1 # encoding: [0xc5,0xfb,0x10,0x08] 1400; X86-AVX1-NEXT: # xmm1 = mem[0],zero 1401; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 1402; X86-AVX1-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 1403; X86-AVX1-NEXT: retl # encoding: [0xc3] 1404; 1405; X86-AVX512-LABEL: test_mm_loadl_pi: 1406; X86-AVX512: # %bb.0: 1407; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1408; X86-AVX512-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] 1409; X86-AVX512-NEXT: # xmm1 = mem[0],zero 1410; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 1411; X86-AVX512-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 1412; X86-AVX512-NEXT: retl # encoding: [0xc3] 1413; 1414; X64-SSE-LABEL: test_mm_loadl_pi: 1415; X64-SSE: # %bb.0: 1416; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1417; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1418; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1419; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1420; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1421; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1422; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1423; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1424; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1425; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1426; X64-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] 1427; X64-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] 1428; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1429; X64-SSE-NEXT: retq # encoding: [0xc3] 1430; 1431; X64-AVX1-LABEL: test_mm_loadl_pi: 1432; X64-AVX1: # %bb.0: 1433; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] 1434; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 1435; X64-AVX1-NEXT: retq # encoding: [0xc3] 1436; 1437; X64-AVX512-LABEL: test_mm_loadl_pi: 1438; X64-AVX512: # %bb.0: 1439; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] 1440; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 1441; X64-AVX512-NEXT: retq # encoding: [0xc3] 1442 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1443 %ld = load <2 x float>, <2 x float>* %ptr 1444 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1445 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1446 ret <4 x float> %res 1447} 1448 1449define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1450; X86-SSE-LABEL: test_mm_loadr_ps: 1451; X86-SSE: # %bb.0: 1452; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1453; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1454; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1455; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1456; X86-SSE-NEXT: retl # encoding: [0xc3] 1457; 1458; X86-AVX1-LABEL: test_mm_loadr_ps: 1459; X86-AVX1: # %bb.0: 1460; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1461; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1462; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1463; X86-AVX1-NEXT: retl # encoding: [0xc3] 1464; 1465; X86-AVX512-LABEL: test_mm_loadr_ps: 1466; X86-AVX512: # %bb.0: 1467; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1468; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1469; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1470; X86-AVX512-NEXT: retl # encoding: [0xc3] 1471; 1472; X64-SSE-LABEL: test_mm_loadr_ps: 1473; X64-SSE: # %bb.0: 1474; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1475; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1476; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1477; X64-SSE-NEXT: retq # encoding: [0xc3] 1478; 1479; X64-AVX1-LABEL: test_mm_loadr_ps: 1480; X64-AVX1: # %bb.0: 1481; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1482; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1483; X64-AVX1-NEXT: retq # encoding: [0xc3] 1484; 1485; X64-AVX512-LABEL: test_mm_loadr_ps: 1486; X64-AVX512: # %bb.0: 1487; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1488; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1489; X64-AVX512-NEXT: retq # encoding: [0xc3] 1490 %arg0 = bitcast float* %a0 to <4 x float>* 1491 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1492 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1493 ret <4 x float> %res 1494} 1495 1496define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1497; X86-SSE-LABEL: test_mm_loadu_ps: 1498; X86-SSE: # %bb.0: 1499; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1500; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 1501; X86-SSE-NEXT: retl # encoding: [0xc3] 1502; 1503; X86-AVX1-LABEL: test_mm_loadu_ps: 1504; X86-AVX1: # %bb.0: 1505; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1506; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 1507; X86-AVX1-NEXT: retl # encoding: [0xc3] 1508; 1509; X86-AVX512-LABEL: test_mm_loadu_ps: 1510; X86-AVX512: # %bb.0: 1511; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1512; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1513; X86-AVX512-NEXT: retl # encoding: [0xc3] 1514; 1515; X64-SSE-LABEL: test_mm_loadu_ps: 1516; X64-SSE: # %bb.0: 1517; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 1518; X64-SSE-NEXT: retq # encoding: [0xc3] 1519; 1520; X64-AVX1-LABEL: test_mm_loadu_ps: 1521; X64-AVX1: # %bb.0: 1522; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 1523; X64-AVX1-NEXT: retq # encoding: [0xc3] 1524; 1525; X64-AVX512-LABEL: test_mm_loadu_ps: 1526; X64-AVX512: # %bb.0: 1527; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1528; X64-AVX512-NEXT: retq # encoding: [0xc3] 1529 %arg0 = bitcast float* %a0 to <4 x float>* 1530 %res = load <4 x float>, <4 x float>* %arg0, align 1 1531 ret <4 x float> %res 1532} 1533 1534define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1535; SSE-LABEL: test_mm_max_ps: 1536; SSE: # %bb.0: 1537; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] 1538; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1539; 1540; AVX1-LABEL: test_mm_max_ps: 1541; AVX1: # %bb.0: 1542; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] 1543; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1544; 1545; AVX512-LABEL: test_mm_max_ps: 1546; AVX512: # %bb.0: 1547; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 1548; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1549 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1550 ret <4 x float> %res 1551} 1552declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1553 1554define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1555; SSE-LABEL: test_mm_max_ss: 1556; SSE: # %bb.0: 1557; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] 1558; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1559; 1560; AVX1-LABEL: test_mm_max_ss: 1561; AVX1: # %bb.0: 1562; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] 1563; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1564; 1565; AVX512-LABEL: test_mm_max_ss: 1566; AVX512: # %bb.0: 1567; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] 1568; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1569 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1570 ret <4 x float> %res 1571} 1572declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1573 1574define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1575; SSE-LABEL: test_mm_min_ps: 1576; SSE: # %bb.0: 1577; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] 1578; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1579; 1580; AVX1-LABEL: test_mm_min_ps: 1581; AVX1: # %bb.0: 1582; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] 1583; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1584; 1585; AVX512-LABEL: test_mm_min_ps: 1586; AVX512: # %bb.0: 1587; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 1588; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1589 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1590 ret <4 x float> %res 1591} 1592declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1593 1594define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1595; SSE-LABEL: test_mm_min_ss: 1596; SSE: # %bb.0: 1597; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] 1598; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1599; 1600; AVX1-LABEL: test_mm_min_ss: 1601; AVX1: # %bb.0: 1602; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] 1603; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1604; 1605; AVX512-LABEL: test_mm_min_ss: 1606; AVX512: # %bb.0: 1607; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] 1608; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1609 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1610 ret <4 x float> %res 1611} 1612declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1613 1614define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1615; SSE-LABEL: test_mm_move_ss: 1616; SSE: # %bb.0: 1617; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 1618; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1619; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1620; 1621; AVX-LABEL: test_mm_move_ss: 1622; AVX: # %bb.0: 1623; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 1624; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1625; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1626 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1627 ret <4 x float> %res 1628} 1629 1630define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1631; SSE-LABEL: test_mm_movehl_ps: 1632; SSE: # %bb.0: 1633; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] 1634; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1] 1635; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1636; 1637; AVX1-LABEL: test_mm_movehl_ps: 1638; AVX1: # %bb.0: 1639; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] 1640; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1] 1641; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1642; 1643; AVX512-LABEL: test_mm_movehl_ps: 1644; AVX512: # %bb.0: 1645; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] 1646; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1] 1647; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1648 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1649 ret <4 x float> %res 1650} 1651 1652define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1653; SSE-LABEL: test_mm_movelh_ps: 1654; SSE: # %bb.0: 1655; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1656; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1657; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1658; 1659; AVX1-LABEL: test_mm_movelh_ps: 1660; AVX1: # %bb.0: 1661; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1662; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1663; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1664; 1665; AVX512-LABEL: test_mm_movelh_ps: 1666; AVX512: # %bb.0: 1667; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1668; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1669; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1670 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1671 ret <4 x float> %res 1672} 1673 1674define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1675; SSE-LABEL: test_mm_movemask_ps: 1676; SSE: # %bb.0: 1677; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] 1678; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679; 1680; AVX-LABEL: test_mm_movemask_ps: 1681; AVX: # %bb.0: 1682; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] 1683; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1684 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1685 ret i32 %res 1686} 1687declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1688 1689define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1690; SSE-LABEL: test_mm_mul_ps: 1691; SSE: # %bb.0: 1692; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] 1693; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1694; 1695; AVX1-LABEL: test_mm_mul_ps: 1696; AVX1: # %bb.0: 1697; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] 1698; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1699; 1700; AVX512-LABEL: test_mm_mul_ps: 1701; AVX512: # %bb.0: 1702; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 1703; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1704 %res = fmul <4 x float> %a0, %a1 1705 ret <4 x float> %res 1706} 1707 1708define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1709; SSE-LABEL: test_mm_mul_ss: 1710; SSE: # %bb.0: 1711; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] 1712; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1713; 1714; AVX1-LABEL: test_mm_mul_ss: 1715; AVX1: # %bb.0: 1716; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] 1717; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1718; 1719; AVX512-LABEL: test_mm_mul_ss: 1720; AVX512: # %bb.0: 1721; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 1722; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1723 %ext0 = extractelement <4 x float> %a0, i32 0 1724 %ext1 = extractelement <4 x float> %a1, i32 0 1725 %fmul = fmul float %ext0, %ext1 1726 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1727 ret <4 x float> %res 1728} 1729 1730define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1731; SSE-LABEL: test_mm_or_ps: 1732; SSE: # %bb.0: 1733; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 1734; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735; 1736; AVX1-LABEL: test_mm_or_ps: 1737; AVX1: # %bb.0: 1738; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 1739; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1740; 1741; AVX512-LABEL: test_mm_or_ps: 1742; AVX512: # %bb.0: 1743; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 1744; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1745 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1746 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1747 %res = or <4 x i32> %arg0, %arg1 1748 %bc = bitcast <4 x i32> %res to <4 x float> 1749 ret <4 x float> %bc 1750} 1751 1752define void @test_mm_prefetch(i8* %a0) { 1753; X86-LABEL: test_mm_prefetch: 1754; X86: # %bb.0: 1755; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1756; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] 1757; X86-NEXT: retl # encoding: [0xc3] 1758; 1759; X64-LABEL: test_mm_prefetch: 1760; X64: # %bb.0: 1761; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] 1762; X64-NEXT: retq # encoding: [0xc3] 1763 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1764 ret void 1765} 1766declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1767 1768define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1769; SSE-LABEL: test_mm_rcp_ps: 1770; SSE: # %bb.0: 1771; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] 1772; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1773; 1774; AVX-LABEL: test_mm_rcp_ps: 1775; AVX: # %bb.0: 1776; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] 1777; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1778 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1779 ret <4 x float> %res 1780} 1781declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1782 1783define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1784; SSE-LABEL: test_mm_rcp_ss: 1785; SSE: # %bb.0: 1786; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] 1787; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1788; 1789; AVX-LABEL: test_mm_rcp_ss: 1790; AVX: # %bb.0: 1791; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] 1792; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1793 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1794 ret <4 x float> %rcp 1795} 1796declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1797 1798define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1799; SSE-LABEL: test_mm_rsqrt_ps: 1800; SSE: # %bb.0: 1801; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] 1802; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1803; 1804; AVX-LABEL: test_mm_rsqrt_ps: 1805; AVX: # %bb.0: 1806; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] 1807; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1808 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1809 ret <4 x float> %res 1810} 1811declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1812 1813define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1814; SSE-LABEL: test_mm_rsqrt_ss: 1815; SSE: # %bb.0: 1816; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] 1817; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1818; 1819; AVX-LABEL: test_mm_rsqrt_ss: 1820; AVX: # %bb.0: 1821; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] 1822; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1823 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1824 ret <4 x float> %rsqrt 1825} 1826declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1827 1828define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1829; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1830; X86-SSE: # %bb.0: 1831; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1832; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1833; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1834; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1835; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1836; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1837; X86-SSE-NEXT: # imm = 0xE07F 1838; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1839; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1840; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1841; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1842; X86-SSE-NEXT: retl # encoding: [0xc3] 1843; 1844; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1845; X86-AVX: # %bb.0: 1846; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1847; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1848; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1849; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1850; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1851; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1852; X86-AVX-NEXT: # imm = 0xE07F 1853; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1854; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1855; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1856; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1857; X86-AVX-NEXT: retl # encoding: [0xc3] 1858; 1859; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1860; X64-SSE: # %bb.0: 1861; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1862; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1863; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1864; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1865; X64-SSE-NEXT: # imm = 0xE07F 1866; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1867; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1868; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1869; X64-SSE-NEXT: retq # encoding: [0xc3] 1870; 1871; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1872; X64-AVX: # %bb.0: 1873; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1874; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1875; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1876; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1877; X64-AVX-NEXT: # imm = 0xE07F 1878; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1879; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1880; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1881; X64-AVX-NEXT: retq # encoding: [0xc3] 1882 %1 = alloca i32, align 4 1883 %2 = bitcast i32* %1 to i8* 1884 call void @llvm.x86.sse.stmxcsr(i8* %2) 1885 %3 = load i32, i32* %1 1886 %4 = and i32 %3, -8065 1887 %5 = or i32 %4, %a0 1888 store i32 %5, i32* %1 1889 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1890 ret void 1891} 1892declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1893 1894define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1895; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1896; X86-SSE: # %bb.0: 1897; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1898; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1899; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1900; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1901; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1902; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1903; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1904; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1905; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1906; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1907; X86-SSE-NEXT: retl # encoding: [0xc3] 1908; 1909; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1910; X86-AVX: # %bb.0: 1911; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1912; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1913; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1914; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1915; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1916; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1917; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1918; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1919; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1920; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1921; X86-AVX-NEXT: retl # encoding: [0xc3] 1922; 1923; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1924; X64-SSE: # %bb.0: 1925; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1926; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1927; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1928; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1929; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1930; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1931; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1932; X64-SSE-NEXT: retq # encoding: [0xc3] 1933; 1934; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1935; X64-AVX: # %bb.0: 1936; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1937; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1938; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1939; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1940; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1941; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1942; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1943; X64-AVX-NEXT: retq # encoding: [0xc3] 1944 %1 = alloca i32, align 4 1945 %2 = bitcast i32* %1 to i8* 1946 call void @llvm.x86.sse.stmxcsr(i8* %2) 1947 %3 = load i32, i32* %1 1948 %4 = and i32 %3, -64 1949 %5 = or i32 %4, %a0 1950 store i32 %5, i32* %1 1951 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1952 ret void 1953} 1954 1955define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1956; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1957; X86-SSE: # %bb.0: 1958; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1959; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1960; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1961; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1962; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1963; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1964; X86-SSE-NEXT: # imm = 0xFFFF7FFF 1965; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1966; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1967; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1968; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1969; X86-SSE-NEXT: retl # encoding: [0xc3] 1970; 1971; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1972; X86-AVX: # %bb.0: 1973; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1974; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1975; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1976; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1977; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1978; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1979; X86-AVX-NEXT: # imm = 0xFFFF7FFF 1980; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1981; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1982; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1983; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1984; X86-AVX-NEXT: retl # encoding: [0xc3] 1985; 1986; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1987; X64-SSE: # %bb.0: 1988; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1989; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1990; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1991; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1992; X64-SSE-NEXT: # imm = 0xFFFF7FFF 1993; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1994; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1995; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1996; X64-SSE-NEXT: retq # encoding: [0xc3] 1997; 1998; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1999; X64-AVX: # %bb.0: 2000; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2001; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2002; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2003; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 2004; X64-AVX-NEXT: # imm = 0xFFFF7FFF 2005; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2006; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2007; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2008; X64-AVX-NEXT: retq # encoding: [0xc3] 2009 %1 = alloca i32, align 4 2010 %2 = bitcast i32* %1 to i8* 2011 call void @llvm.x86.sse.stmxcsr(i8* %2) 2012 %3 = load i32, i32* %1 2013 %4 = and i32 %3, -32769 2014 %5 = or i32 %4, %a0 2015 store i32 %5, i32* %1 2016 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2017 ret void 2018} 2019 2020define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2021; X86-SSE-LABEL: test_mm_set_ps: 2022; X86-SSE: # %bb.0: 2023; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2024; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2025; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2026; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2027; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2028; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2029; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 2030; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2031; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] 2032; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2033; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 2034; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2035; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2036; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2037; X86-SSE-NEXT: retl # encoding: [0xc3] 2038; 2039; X86-AVX1-LABEL: test_mm_set_ps: 2040; X86-AVX1: # %bb.0: 2041; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2042; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2043; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2044; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2045; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2046; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2047; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2048; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2049; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2050; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2051; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2052; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2053; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2054; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2055; X86-AVX1-NEXT: retl # encoding: [0xc3] 2056; 2057; X86-AVX512-LABEL: test_mm_set_ps: 2058; X86-AVX512: # %bb.0: 2059; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2060; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2061; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2062; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2063; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2064; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2065; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2066; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2067; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2068; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2069; X86-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2070; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2071; X86-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2072; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2073; X86-AVX512-NEXT: retl # encoding: [0xc3] 2074; 2075; X64-SSE-LABEL: test_mm_set_ps: 2076; X64-SSE: # %bb.0: 2077; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2078; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2079; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] 2080; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2081; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] 2082; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0] 2083; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] 2084; X64-SSE-NEXT: retq # encoding: [0xc3] 2085; 2086; X64-AVX1-LABEL: test_mm_set_ps: 2087; X64-AVX1: # %bb.0: 2088; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2089; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2090; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2091; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2092; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2093; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2094; X64-AVX1-NEXT: retq # encoding: [0xc3] 2095; 2096; X64-AVX512-LABEL: test_mm_set_ps: 2097; X64-AVX512: # %bb.0: 2098; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2099; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2100; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2101; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2102; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2103; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2104; X64-AVX512-NEXT: retq # encoding: [0xc3] 2105 %res0 = insertelement <4 x float> undef, float %a3, i32 0 2106 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 2107 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 2108 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2109 ret <4 x float> %res3 2110} 2111 2112define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 2113; X86-SSE-LABEL: test_mm_set_ps1: 2114; X86-SSE: # %bb.0: 2115; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2116; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2117; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2118; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2119; X86-SSE-NEXT: retl # encoding: [0xc3] 2120; 2121; X86-AVX1-LABEL: test_mm_set_ps1: 2122; X86-AVX1: # %bb.0: 2123; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2124; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2125; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2126; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2127; X86-AVX1-NEXT: retl # encoding: [0xc3] 2128; 2129; X86-AVX512-LABEL: test_mm_set_ps1: 2130; X86-AVX512: # %bb.0: 2131; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2132; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2133; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2134; X86-AVX512-NEXT: retl # encoding: [0xc3] 2135; 2136; X64-SSE-LABEL: test_mm_set_ps1: 2137; X64-SSE: # %bb.0: 2138; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2139; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2140; X64-SSE-NEXT: retq # encoding: [0xc3] 2141; 2142; X64-AVX1-LABEL: test_mm_set_ps1: 2143; X64-AVX1: # %bb.0: 2144; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2145; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2146; X64-AVX1-NEXT: retq # encoding: [0xc3] 2147; 2148; X64-AVX512-LABEL: test_mm_set_ps1: 2149; X64-AVX512: # %bb.0: 2150; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2151; X64-AVX512-NEXT: retq # encoding: [0xc3] 2152 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2153 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2154 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2155 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2156 ret <4 x float> %res3 2157} 2158 2159define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 2160; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2161; X86-SSE: # %bb.0: 2162; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2163; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2164; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2165; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 2166; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2167; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2168; X86-SSE-NEXT: # imm = 0x9FFF 2169; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2170; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2171; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 2172; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2173; X86-SSE-NEXT: retl # encoding: [0xc3] 2174; 2175; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2176; X86-AVX: # %bb.0: 2177; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 2178; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2179; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2180; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 2181; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2182; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2183; X86-AVX-NEXT: # imm = 0x9FFF 2184; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2185; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2186; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 2187; X86-AVX-NEXT: popl %eax # encoding: [0x58] 2188; X86-AVX-NEXT: retl # encoding: [0xc3] 2189; 2190; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2191; X64-SSE: # %bb.0: 2192; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2193; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 2194; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2195; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2196; X64-SSE-NEXT: # imm = 0x9FFF 2197; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2198; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2199; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2200; X64-SSE-NEXT: retq # encoding: [0xc3] 2201; 2202; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2203; X64-AVX: # %bb.0: 2204; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2205; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2206; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2207; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2208; X64-AVX-NEXT: # imm = 0x9FFF 2209; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2210; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2211; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2212; X64-AVX-NEXT: retq # encoding: [0xc3] 2213 %1 = alloca i32, align 4 2214 %2 = bitcast i32* %1 to i8* 2215 call void @llvm.x86.sse.stmxcsr(i8* %2) 2216 %3 = load i32, i32* %1 2217 %4 = and i32 %3, -24577 2218 %5 = or i32 %4, %a0 2219 store i32 %5, i32* %1 2220 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2221 ret void 2222} 2223 2224define <4 x float> @test_mm_set_ss(float %a0) nounwind { 2225; X86-SSE-LABEL: test_mm_set_ss: 2226; X86-SSE: # %bb.0: 2227; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 2228; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2229; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2230; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 2231; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 2232; X86-SSE-NEXT: retl # encoding: [0xc3] 2233; 2234; X86-AVX1-LABEL: test_mm_set_ss: 2235; X86-AVX1: # %bb.0: 2236; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2237; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2238; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2239; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2240; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2241; X86-AVX1-NEXT: retl # encoding: [0xc3] 2242; 2243; X86-AVX512-LABEL: test_mm_set_ss: 2244; X86-AVX512: # %bb.0: 2245; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2246; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2247; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2248; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2249; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2250; X86-AVX512-NEXT: retl # encoding: [0xc3] 2251; 2252; X64-SSE-LABEL: test_mm_set_ss: 2253; X64-SSE: # %bb.0: 2254; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] 2255; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] 2256; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3] 2257; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 2258; X64-SSE-NEXT: retq # encoding: [0xc3] 2259; 2260; X64-AVX-LABEL: test_mm_set_ss: 2261; X64-AVX: # %bb.0: 2262; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2263; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2264; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2265; X64-AVX-NEXT: retq # encoding: [0xc3] 2266 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2267 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 2268 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 2269 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 2270 ret <4 x float> %res3 2271} 2272 2273define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 2274; X86-SSE-LABEL: test_mm_set1_ps: 2275; X86-SSE: # %bb.0: 2276; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2277; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2278; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2279; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2280; X86-SSE-NEXT: retl # encoding: [0xc3] 2281; 2282; X86-AVX1-LABEL: test_mm_set1_ps: 2283; X86-AVX1: # %bb.0: 2284; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2285; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2286; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2287; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2288; X86-AVX1-NEXT: retl # encoding: [0xc3] 2289; 2290; X86-AVX512-LABEL: test_mm_set1_ps: 2291; X86-AVX512: # %bb.0: 2292; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2293; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2294; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2295; X86-AVX512-NEXT: retl # encoding: [0xc3] 2296; 2297; X64-SSE-LABEL: test_mm_set1_ps: 2298; X64-SSE: # %bb.0: 2299; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2300; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2301; X64-SSE-NEXT: retq # encoding: [0xc3] 2302; 2303; X64-AVX1-LABEL: test_mm_set1_ps: 2304; X64-AVX1: # %bb.0: 2305; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2306; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2307; X64-AVX1-NEXT: retq # encoding: [0xc3] 2308; 2309; X64-AVX512-LABEL: test_mm_set1_ps: 2310; X64-AVX512: # %bb.0: 2311; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2312; X64-AVX512-NEXT: retq # encoding: [0xc3] 2313 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2314 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2315 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2316 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2317 ret <4 x float> %res3 2318} 2319 2320define void @test_mm_setcsr(i32 %a0) nounwind { 2321; X86-SSE-LABEL: test_mm_setcsr: 2322; X86-SSE: # %bb.0: 2323; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2324; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] 2325; X86-SSE-NEXT: retl # encoding: [0xc3] 2326; 2327; X86-AVX-LABEL: test_mm_setcsr: 2328; X86-AVX: # %bb.0: 2329; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2330; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] 2331; X86-AVX-NEXT: retl # encoding: [0xc3] 2332; 2333; X64-SSE-LABEL: test_mm_setcsr: 2334; X64-SSE: # %bb.0: 2335; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2336; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2337; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2338; X64-SSE-NEXT: retq # encoding: [0xc3] 2339; 2340; X64-AVX-LABEL: test_mm_setcsr: 2341; X64-AVX: # %bb.0: 2342; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2343; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2344; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2345; X64-AVX-NEXT: retq # encoding: [0xc3] 2346 %st = alloca i32, align 4 2347 store i32 %a0, i32* %st, align 4 2348 %bc = bitcast i32* %st to i8* 2349 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 2350 ret void 2351} 2352 2353define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2354; X86-SSE-LABEL: test_mm_setr_ps: 2355; X86-SSE: # %bb.0: 2356; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2357; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2358; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2359; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2360; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2361; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2362; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 2363; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2364; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2365; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2366; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 2367; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2368; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2369; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2370; X86-SSE-NEXT: retl # encoding: [0xc3] 2371; 2372; X86-AVX1-LABEL: test_mm_setr_ps: 2373; X86-AVX1: # %bb.0: 2374; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2375; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2376; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2377; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2378; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2379; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero 2380; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2381; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero 2382; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2383; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2384; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2385; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2386; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2387; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2388; X86-AVX1-NEXT: retl # encoding: [0xc3] 2389; 2390; X86-AVX512-LABEL: test_mm_setr_ps: 2391; X86-AVX512: # %bb.0: 2392; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2393; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2394; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2395; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2396; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2397; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2398; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2399; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2400; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2401; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2402; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2403; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2404; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2405; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2406; X86-AVX512-NEXT: retl # encoding: [0xc3] 2407; 2408; X64-SSE-LABEL: test_mm_setr_ps: 2409; X64-SSE: # %bb.0: 2410; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] 2411; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2412; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2413; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2414; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] 2415; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 2416; X64-SSE-NEXT: retq # encoding: [0xc3] 2417; 2418; X64-AVX1-LABEL: test_mm_setr_ps: 2419; X64-AVX1: # %bb.0: 2420; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2421; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2422; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2423; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2424; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2425; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2426; X64-AVX1-NEXT: retq # encoding: [0xc3] 2427; 2428; X64-AVX512-LABEL: test_mm_setr_ps: 2429; X64-AVX512: # %bb.0: 2430; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2431; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2432; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2433; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2434; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2435; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2436; X64-AVX512-NEXT: retq # encoding: [0xc3] 2437 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2438 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 2439 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 2440 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 2441 ret <4 x float> %res3 2442} 2443 2444define <4 x float> @test_mm_setzero_ps() { 2445; SSE-LABEL: test_mm_setzero_ps: 2446; SSE: # %bb.0: 2447; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2448; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2449; 2450; AVX1-LABEL: test_mm_setzero_ps: 2451; AVX1: # %bb.0: 2452; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 2453; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2454; 2455; AVX512-LABEL: test_mm_setzero_ps: 2456; AVX512: # %bb.0: 2457; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 2458; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2459 ret <4 x float> zeroinitializer 2460} 2461 2462define void @test_mm_sfence() nounwind { 2463; CHECK-LABEL: test_mm_sfence: 2464; CHECK: # %bb.0: 2465; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8] 2466; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2467 call void @llvm.x86.sse.sfence() 2468 ret void 2469} 2470declare void @llvm.x86.sse.sfence() nounwind readnone 2471 2472define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2473; SSE-LABEL: test_mm_shuffle_ps: 2474; SSE: # %bb.0: 2475; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] 2476; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2477; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2478; 2479; AVX1-LABEL: test_mm_shuffle_ps: 2480; AVX1: # %bb.0: 2481; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2482; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2483; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2484; 2485; AVX512-LABEL: test_mm_shuffle_ps: 2486; AVX512: # %bb.0: 2487; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2488; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2489; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2490 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 2491 ret <4 x float> %res 2492} 2493 2494define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 2495; SSE-LABEL: test_mm_sqrt_ps: 2496; SSE: # %bb.0: 2497; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] 2498; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2499; 2500; AVX1-LABEL: test_mm_sqrt_ps: 2501; AVX1: # %bb.0: 2502; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] 2503; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2504; 2505; AVX512-LABEL: test_mm_sqrt_ps: 2506; AVX512: # %bb.0: 2507; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 2508; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2509 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2510 ret <4 x float> %res 2511} 2512declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone 2513 2514define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 2515; SSE-LABEL: test_mm_sqrt_ss: 2516; SSE: # %bb.0: 2517; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2518; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2519; 2520; AVX1-LABEL: test_mm_sqrt_ss: 2521; AVX1: # %bb.0: 2522; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2523; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2524; 2525; AVX512-LABEL: test_mm_sqrt_ss: 2526; AVX512: # %bb.0: 2527; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2528; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2529 %ext = extractelement <4 x float> %a0, i32 0 2530 %sqrt = call float @llvm.sqrt.f32(float %ext) 2531 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 2532 ret <4 x float> %ins 2533} 2534declare float @llvm.sqrt.f32(float) nounwind readnone 2535 2536define float @test_mm_sqrt_ss_scalar(float %a0) { 2537; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: 2538; X86-SSE: # %bb.0: 2539; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2540; X86-SSE-NEXT: .cfi_def_cfa_offset 8 2541; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 2542; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2543; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2544; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 2545; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2546; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2547; X86-SSE-NEXT: .cfi_def_cfa_offset 4 2548; X86-SSE-NEXT: retl # encoding: [0xc3] 2549; 2550; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2551; X86-AVX1: # %bb.0: 2552; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 2553; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 2554; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2555; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2556; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2557; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 2558; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2559; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 2560; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 2561; X86-AVX1-NEXT: retl # encoding: [0xc3] 2562; 2563; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2564; X86-AVX512: # %bb.0: 2565; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 2566; X86-AVX512-NEXT: .cfi_def_cfa_offset 8 2567; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2568; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2569; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2570; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 2571; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2572; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 2573; X86-AVX512-NEXT: .cfi_def_cfa_offset 4 2574; X86-AVX512-NEXT: retl # encoding: [0xc3] 2575; 2576; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: 2577; X64-SSE: # %bb.0: 2578; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2579; X64-SSE-NEXT: retq # encoding: [0xc3] 2580; 2581; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2582; X64-AVX1: # %bb.0: 2583; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2584; X64-AVX1-NEXT: retq # encoding: [0xc3] 2585; 2586; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2587; X64-AVX512: # %bb.0: 2588; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2589; X64-AVX512-NEXT: retq # encoding: [0xc3] 2590 %sqrt = call float @llvm.sqrt.f32(float %a0) 2591 ret float %sqrt 2592} 2593 2594define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 2595; X86-SSE-LABEL: test_mm_store_ps: 2596; X86-SSE: # %bb.0: 2597; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2598; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2599; X86-SSE-NEXT: retl # encoding: [0xc3] 2600; 2601; X86-AVX1-LABEL: test_mm_store_ps: 2602; X86-AVX1: # %bb.0: 2603; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2604; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2605; X86-AVX1-NEXT: retl # encoding: [0xc3] 2606; 2607; X86-AVX512-LABEL: test_mm_store_ps: 2608; X86-AVX512: # %bb.0: 2609; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2610; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2611; X86-AVX512-NEXT: retl # encoding: [0xc3] 2612; 2613; X64-SSE-LABEL: test_mm_store_ps: 2614; X64-SSE: # %bb.0: 2615; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2616; X64-SSE-NEXT: retq # encoding: [0xc3] 2617; 2618; X64-AVX1-LABEL: test_mm_store_ps: 2619; X64-AVX1: # %bb.0: 2620; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2621; X64-AVX1-NEXT: retq # encoding: [0xc3] 2622; 2623; X64-AVX512-LABEL: test_mm_store_ps: 2624; X64-AVX512: # %bb.0: 2625; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2626; X64-AVX512-NEXT: retq # encoding: [0xc3] 2627 %arg0 = bitcast float* %a0 to <4 x float>* 2628 store <4 x float> %a1, <4 x float>* %arg0, align 16 2629 ret void 2630} 2631 2632define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 2633; X86-SSE-LABEL: test_mm_store_ps1: 2634; X86-SSE: # %bb.0: 2635; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2636; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2637; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2638; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2639; X86-SSE-NEXT: retl # encoding: [0xc3] 2640; 2641; X86-AVX1-LABEL: test_mm_store_ps1: 2642; X86-AVX1: # %bb.0: 2643; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2644; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2645; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2646; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2647; X86-AVX1-NEXT: retl # encoding: [0xc3] 2648; 2649; X86-AVX512-LABEL: test_mm_store_ps1: 2650; X86-AVX512: # %bb.0: 2651; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2652; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2653; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2654; X86-AVX512-NEXT: retl # encoding: [0xc3] 2655; 2656; X64-SSE-LABEL: test_mm_store_ps1: 2657; X64-SSE: # %bb.0: 2658; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2659; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2660; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2661; X64-SSE-NEXT: retq # encoding: [0xc3] 2662; 2663; X64-AVX1-LABEL: test_mm_store_ps1: 2664; X64-AVX1: # %bb.0: 2665; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2666; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2667; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2668; X64-AVX1-NEXT: retq # encoding: [0xc3] 2669; 2670; X64-AVX512-LABEL: test_mm_store_ps1: 2671; X64-AVX512: # %bb.0: 2672; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2673; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2674; X64-AVX512-NEXT: retq # encoding: [0xc3] 2675 %arg0 = bitcast float* %a0 to <4 x float>* 2676 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2677 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2678 ret void 2679} 2680 2681define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 2682; X86-SSE-LABEL: test_mm_store_ss: 2683; X86-SSE: # %bb.0: 2684; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2685; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] 2686; X86-SSE-NEXT: retl # encoding: [0xc3] 2687; 2688; X86-AVX1-LABEL: test_mm_store_ss: 2689; X86-AVX1: # %bb.0: 2690; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2691; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] 2692; X86-AVX1-NEXT: retl # encoding: [0xc3] 2693; 2694; X86-AVX512-LABEL: test_mm_store_ss: 2695; X86-AVX512: # %bb.0: 2696; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2697; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] 2698; X86-AVX512-NEXT: retl # encoding: [0xc3] 2699; 2700; X64-SSE-LABEL: test_mm_store_ss: 2701; X64-SSE: # %bb.0: 2702; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] 2703; X64-SSE-NEXT: retq # encoding: [0xc3] 2704; 2705; X64-AVX1-LABEL: test_mm_store_ss: 2706; X64-AVX1: # %bb.0: 2707; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 2708; X64-AVX1-NEXT: retq # encoding: [0xc3] 2709; 2710; X64-AVX512-LABEL: test_mm_store_ss: 2711; X64-AVX512: # %bb.0: 2712; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 2713; X64-AVX512-NEXT: retq # encoding: [0xc3] 2714 %ext = extractelement <4 x float> %a1, i32 0 2715 store float %ext, float* %a0, align 1 2716 ret void 2717} 2718 2719define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 2720; X86-SSE-LABEL: test_mm_store1_ps: 2721; X86-SSE: # %bb.0: 2722; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2723; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2724; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2725; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2726; X86-SSE-NEXT: retl # encoding: [0xc3] 2727; 2728; X86-AVX1-LABEL: test_mm_store1_ps: 2729; X86-AVX1: # %bb.0: 2730; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2731; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2732; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2733; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2734; X86-AVX1-NEXT: retl # encoding: [0xc3] 2735; 2736; X86-AVX512-LABEL: test_mm_store1_ps: 2737; X86-AVX512: # %bb.0: 2738; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2739; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2740; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2741; X86-AVX512-NEXT: retl # encoding: [0xc3] 2742; 2743; X64-SSE-LABEL: test_mm_store1_ps: 2744; X64-SSE: # %bb.0: 2745; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2746; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2747; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2748; X64-SSE-NEXT: retq # encoding: [0xc3] 2749; 2750; X64-AVX1-LABEL: test_mm_store1_ps: 2751; X64-AVX1: # %bb.0: 2752; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2753; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2754; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2755; X64-AVX1-NEXT: retq # encoding: [0xc3] 2756; 2757; X64-AVX512-LABEL: test_mm_store1_ps: 2758; X64-AVX512: # %bb.0: 2759; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2760; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2761; X64-AVX512-NEXT: retq # encoding: [0xc3] 2762 %arg0 = bitcast float* %a0 to <4 x float>* 2763 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2764 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2765 ret void 2766} 2767 2768define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2769; X86-SSE-LABEL: test_mm_storeh_ps: 2770; X86-SSE: # %bb.0: 2771; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2772; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2773; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2774; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2775; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2776; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2777; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2778; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2779; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2780; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2781; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2782; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2783; X86-SSE-NEXT: retl # encoding: [0xc3] 2784; 2785; X86-AVX1-LABEL: test_mm_storeh_ps: 2786; X86-AVX1: # %bb.0: 2787; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2788; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00] 2789; X86-AVX1-NEXT: retl # encoding: [0xc3] 2790; 2791; X86-AVX512-LABEL: test_mm_storeh_ps: 2792; X86-AVX512: # %bb.0: 2793; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2794; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00] 2795; X86-AVX512-NEXT: retl # encoding: [0xc3] 2796; 2797; X64-SSE-LABEL: test_mm_storeh_ps: 2798; X64-SSE: # %bb.0: 2799; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2800; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] 2801; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2802; X64-SSE-NEXT: retq # encoding: [0xc3] 2803; 2804; X64-AVX1-LABEL: test_mm_storeh_ps: 2805; X64-AVX1: # %bb.0: 2806; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2807; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2808; X64-AVX1-NEXT: retq # encoding: [0xc3] 2809; 2810; X64-AVX512-LABEL: test_mm_storeh_ps: 2811; X64-AVX512: # %bb.0: 2812; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2813; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2814; X64-AVX512-NEXT: retq # encoding: [0xc3] 2815 %ptr = bitcast x86_mmx* %a0 to i64* 2816 %bc = bitcast <4 x float> %a1 to <2 x i64> 2817 %ext = extractelement <2 x i64> %bc, i32 1 2818 store i64 %ext, i64* %ptr 2819 ret void 2820} 2821 2822define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2823; X86-SSE-LABEL: test_mm_storel_ps: 2824; X86-SSE: # %bb.0: 2825; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2826; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2827; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2828; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2829; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2830; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2831; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] 2832; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 2833; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2834; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2835; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2836; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2837; X86-SSE-NEXT: retl # encoding: [0xc3] 2838; 2839; X86-AVX1-LABEL: test_mm_storel_ps: 2840; X86-AVX1: # %bb.0: 2841; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2842; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2843; X86-AVX1-NEXT: retl # encoding: [0xc3] 2844; 2845; X86-AVX512-LABEL: test_mm_storel_ps: 2846; X86-AVX512: # %bb.0: 2847; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2848; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2849; X86-AVX512-NEXT: retl # encoding: [0xc3] 2850; 2851; X64-SSE-LABEL: test_mm_storel_ps: 2852; X64-SSE: # %bb.0: 2853; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2854; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] 2855; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2856; X64-SSE-NEXT: retq # encoding: [0xc3] 2857; 2858; X64-AVX1-LABEL: test_mm_storel_ps: 2859; X64-AVX1: # %bb.0: 2860; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2861; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2862; X64-AVX1-NEXT: retq # encoding: [0xc3] 2863; 2864; X64-AVX512-LABEL: test_mm_storel_ps: 2865; X64-AVX512: # %bb.0: 2866; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2867; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2868; X64-AVX512-NEXT: retq # encoding: [0xc3] 2869 %ptr = bitcast x86_mmx* %a0 to i64* 2870 %bc = bitcast <4 x float> %a1 to <2 x i64> 2871 %ext = extractelement <2 x i64> %bc, i32 0 2872 store i64 %ext, i64* %ptr 2873 ret void 2874} 2875 2876define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 2877; X86-SSE-LABEL: test_mm_storer_ps: 2878; X86-SSE: # %bb.0: 2879; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2880; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2881; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2882; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2883; X86-SSE-NEXT: retl # encoding: [0xc3] 2884; 2885; X86-AVX1-LABEL: test_mm_storer_ps: 2886; X86-AVX1: # %bb.0: 2887; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2888; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2889; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2890; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2891; X86-AVX1-NEXT: retl # encoding: [0xc3] 2892; 2893; X86-AVX512-LABEL: test_mm_storer_ps: 2894; X86-AVX512: # %bb.0: 2895; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2896; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2897; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2898; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2899; X86-AVX512-NEXT: retl # encoding: [0xc3] 2900; 2901; X64-SSE-LABEL: test_mm_storer_ps: 2902; X64-SSE: # %bb.0: 2903; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2904; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2905; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2906; X64-SSE-NEXT: retq # encoding: [0xc3] 2907; 2908; X64-AVX1-LABEL: test_mm_storer_ps: 2909; X64-AVX1: # %bb.0: 2910; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2911; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2912; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2913; X64-AVX1-NEXT: retq # encoding: [0xc3] 2914; 2915; X64-AVX512-LABEL: test_mm_storer_ps: 2916; X64-AVX512: # %bb.0: 2917; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2918; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2919; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2920; X64-AVX512-NEXT: retq # encoding: [0xc3] 2921 %arg0 = bitcast float* %a0 to <4 x float>* 2922 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2923 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2924 ret void 2925} 2926 2927define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 2928; X86-SSE-LABEL: test_mm_storeu_ps: 2929; X86-SSE: # %bb.0: 2930; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2931; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 2932; X86-SSE-NEXT: retl # encoding: [0xc3] 2933; 2934; X86-AVX1-LABEL: test_mm_storeu_ps: 2935; X86-AVX1: # %bb.0: 2936; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2937; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 2938; X86-AVX1-NEXT: retl # encoding: [0xc3] 2939; 2940; X86-AVX512-LABEL: test_mm_storeu_ps: 2941; X86-AVX512: # %bb.0: 2942; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2943; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 2944; X86-AVX512-NEXT: retl # encoding: [0xc3] 2945; 2946; X64-SSE-LABEL: test_mm_storeu_ps: 2947; X64-SSE: # %bb.0: 2948; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 2949; X64-SSE-NEXT: retq # encoding: [0xc3] 2950; 2951; X64-AVX1-LABEL: test_mm_storeu_ps: 2952; X64-AVX1: # %bb.0: 2953; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 2954; X64-AVX1-NEXT: retq # encoding: [0xc3] 2955; 2956; X64-AVX512-LABEL: test_mm_storeu_ps: 2957; X64-AVX512: # %bb.0: 2958; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 2959; X64-AVX512-NEXT: retq # encoding: [0xc3] 2960 %arg0 = bitcast float* %a0 to <4 x float>* 2961 store <4 x float> %a1, <4 x float>* %arg0, align 1 2962 ret void 2963} 2964 2965define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 2966; X86-SSE-LABEL: test_mm_stream_ps: 2967; X86-SSE: # %bb.0: 2968; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2969; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 2970; X86-SSE-NEXT: retl # encoding: [0xc3] 2971; 2972; X86-AVX1-LABEL: test_mm_stream_ps: 2973; X86-AVX1: # %bb.0: 2974; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2975; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 2976; X86-AVX1-NEXT: retl # encoding: [0xc3] 2977; 2978; X86-AVX512-LABEL: test_mm_stream_ps: 2979; X86-AVX512: # %bb.0: 2980; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2981; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 2982; X86-AVX512-NEXT: retl # encoding: [0xc3] 2983; 2984; X64-SSE-LABEL: test_mm_stream_ps: 2985; X64-SSE: # %bb.0: 2986; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 2987; X64-SSE-NEXT: retq # encoding: [0xc3] 2988; 2989; X64-AVX1-LABEL: test_mm_stream_ps: 2990; X64-AVX1: # %bb.0: 2991; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 2992; X64-AVX1-NEXT: retq # encoding: [0xc3] 2993; 2994; X64-AVX512-LABEL: test_mm_stream_ps: 2995; X64-AVX512: # %bb.0: 2996; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 2997; X64-AVX512-NEXT: retq # encoding: [0xc3] 2998 %arg0 = bitcast float* %a0 to <4 x float>* 2999 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 3000 ret void 3001} 3002 3003define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3004; SSE-LABEL: test_mm_sub_ps: 3005; SSE: # %bb.0: 3006; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] 3007; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3008; 3009; AVX1-LABEL: test_mm_sub_ps: 3010; AVX1: # %bb.0: 3011; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] 3012; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3013; 3014; AVX512-LABEL: test_mm_sub_ps: 3015; AVX512: # %bb.0: 3016; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 3017; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3018 %res = fsub <4 x float> %a0, %a1 3019 ret <4 x float> %res 3020} 3021 3022define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3023; SSE-LABEL: test_mm_sub_ss: 3024; SSE: # %bb.0: 3025; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] 3026; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3027; 3028; AVX1-LABEL: test_mm_sub_ss: 3029; AVX1: # %bb.0: 3030; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] 3031; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3032; 3033; AVX512-LABEL: test_mm_sub_ss: 3034; AVX512: # %bb.0: 3035; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 3036; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3037 %ext0 = extractelement <4 x float> %a0, i32 0 3038 %ext1 = extractelement <4 x float> %a1, i32 0 3039 %fsub = fsub float %ext0, %ext1 3040 %res = insertelement <4 x float> %a0, float %fsub, i32 0 3041 ret <4 x float> %res 3042} 3043 3044define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 3045; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3046; X86-SSE: # %bb.0: 3047; X86-SSE-NEXT: pushl %esi # encoding: [0x56] 3048; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3049; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3050; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3051; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3052; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] 3053; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] 3054; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] 3055; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] 3056; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3057; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3058; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3059; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3060; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3061; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3062; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3063; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3064; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3065; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3066; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3067; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3068; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3069; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3070; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3071; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3072; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3073; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3074; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3075; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3076; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] 3077; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] 3078; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] 3079; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] 3080; X86-SSE-NEXT: popl %esi # encoding: [0x5e] 3081; X86-SSE-NEXT: retl # encoding: [0xc3] 3082; 3083; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3084; X86-AVX1: # %bb.0: 3085; X86-AVX1-NEXT: pushl %esi # encoding: [0x56] 3086; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3087; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3088; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3089; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3090; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] 3091; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] 3092; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] 3093; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] 3094; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3095; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3096; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3097; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3098; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3099; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3100; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3101; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3102; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3103; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3104; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3105; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3106; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3107; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3108; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3109; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3110; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] 3111; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] 3112; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] 3113; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 3114; X86-AVX1-NEXT: popl %esi # encoding: [0x5e] 3115; X86-AVX1-NEXT: retl # encoding: [0xc3] 3116; 3117; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3118; X86-AVX512: # %bb.0: 3119; X86-AVX512-NEXT: pushl %esi # encoding: [0x56] 3120; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3121; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3122; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3123; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3124; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] 3125; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] 3126; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] 3127; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] 3128; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3129; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3130; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3131; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3132; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3133; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3134; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3135; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3136; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3137; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3138; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3139; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3140; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3141; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3142; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3143; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3144; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] 3145; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] 3146; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] 3147; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 3148; X86-AVX512-NEXT: popl %esi # encoding: [0x5e] 3149; X86-AVX512-NEXT: retl # encoding: [0xc3] 3150; 3151; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3152; X64-SSE: # %bb.0: 3153; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 3154; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] 3155; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] 3156; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] 3157; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3158; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3159; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3160; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3161; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3162; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3163; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3164; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3165; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3166; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3167; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3168; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3169; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3170; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3171; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3172; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3173; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3174; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3175; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3176; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3177; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] 3178; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] 3179; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] 3180; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] 3181; X64-SSE-NEXT: retq # encoding: [0xc3] 3182; 3183; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3184; X64-AVX1: # %bb.0: 3185; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 3186; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] 3187; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] 3188; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] 3189; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3190; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3191; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3192; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3193; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3194; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3195; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3196; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3197; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3198; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3199; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3200; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3201; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3202; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3203; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3204; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3205; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] 3206; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] 3207; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] 3208; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] 3209; X64-AVX1-NEXT: retq # encoding: [0xc3] 3210; 3211; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3212; X64-AVX512: # %bb.0: 3213; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 3214; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] 3215; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] 3216; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] 3217; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3218; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3219; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3220; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3221; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3222; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3223; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3224; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3225; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3226; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3227; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3228; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3229; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3230; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3231; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3232; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3233; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] 3234; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] 3235; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] 3236; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] 3237; X64-AVX512-NEXT: retq # encoding: [0xc3] 3238 %row0 = load <4 x float>, <4 x float>* %a0, align 16 3239 %row1 = load <4 x float>, <4 x float>* %a1, align 16 3240 %row2 = load <4 x float>, <4 x float>* %a2, align 16 3241 %row3 = load <4 x float>, <4 x float>* %a3, align 16 3242 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3243 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3244 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3245 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3246 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3247 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3248 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3249 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3250 store <4 x float> %res0, <4 x float>* %a0, align 16 3251 store <4 x float> %res1, <4 x float>* %a1, align 16 3252 store <4 x float> %res2, <4 x float>* %a2, align 16 3253 store <4 x float> %res3, <4 x float>* %a3, align 16 3254 ret void 3255} 3256 3257define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3258; SSE-LABEL: test_mm_ucomieq_ss: 3259; SSE: # %bb.0: 3260; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3261; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3262; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3263; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3264; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3265; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3266; 3267; AVX1-LABEL: test_mm_ucomieq_ss: 3268; AVX1: # %bb.0: 3269; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3270; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3271; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3272; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3273; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3274; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3275; 3276; AVX512-LABEL: test_mm_ucomieq_ss: 3277; AVX512: # %bb.0: 3278; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3279; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3280; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3281; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3282; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3283; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3284 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 3285 ret i32 %res 3286} 3287declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 3288 3289define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3290; SSE-LABEL: test_mm_ucomige_ss: 3291; SSE: # %bb.0: 3292; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3293; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3294; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3295; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3296; 3297; AVX1-LABEL: test_mm_ucomige_ss: 3298; AVX1: # %bb.0: 3299; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3300; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3301; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3302; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3303; 3304; AVX512-LABEL: test_mm_ucomige_ss: 3305; AVX512: # %bb.0: 3306; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3307; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3308; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3309; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3310 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 3311 ret i32 %res 3312} 3313declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 3314 3315define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3316; SSE-LABEL: test_mm_ucomigt_ss: 3317; SSE: # %bb.0: 3318; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3319; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3320; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3321; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3322; 3323; AVX1-LABEL: test_mm_ucomigt_ss: 3324; AVX1: # %bb.0: 3325; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3326; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3327; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3328; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3329; 3330; AVX512-LABEL: test_mm_ucomigt_ss: 3331; AVX512: # %bb.0: 3332; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3333; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3334; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3335; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3336 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 3337 ret i32 %res 3338} 3339declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 3340 3341define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3342; SSE-LABEL: test_mm_ucomile_ss: 3343; SSE: # %bb.0: 3344; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3345; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3346; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3347; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3348; 3349; AVX1-LABEL: test_mm_ucomile_ss: 3350; AVX1: # %bb.0: 3351; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3352; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3353; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3354; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3355; 3356; AVX512-LABEL: test_mm_ucomile_ss: 3357; AVX512: # %bb.0: 3358; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3359; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3360; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3361; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3362 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 3363 ret i32 %res 3364} 3365declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 3366 3367define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3368; SSE-LABEL: test_mm_ucomilt_ss: 3369; SSE: # %bb.0: 3370; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3371; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3372; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3373; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3374; 3375; AVX1-LABEL: test_mm_ucomilt_ss: 3376; AVX1: # %bb.0: 3377; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3378; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3379; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3380; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3381; 3382; AVX512-LABEL: test_mm_ucomilt_ss: 3383; AVX512: # %bb.0: 3384; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3385; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3386; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3387; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3388 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 3389 ret i32 %res 3390} 3391declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 3392 3393define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3394; SSE-LABEL: test_mm_ucomineq_ss: 3395; SSE: # %bb.0: 3396; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3397; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3398; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3399; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3400; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3401; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3402; 3403; AVX1-LABEL: test_mm_ucomineq_ss: 3404; AVX1: # %bb.0: 3405; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3406; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3407; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3408; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3409; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3410; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3411; 3412; AVX512-LABEL: test_mm_ucomineq_ss: 3413; AVX512: # %bb.0: 3414; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3415; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3416; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3417; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3418; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3419; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3420 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 3421 ret i32 %res 3422} 3423declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 3424 3425define <4 x float> @test_mm_undefined_ps() { 3426; CHECK-LABEL: test_mm_undefined_ps: 3427; CHECK: # %bb.0: 3428; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3429 ret <4 x float> undef 3430} 3431 3432define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3433; SSE-LABEL: test_mm_unpackhi_ps: 3434; SSE: # %bb.0: 3435; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3436; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3437; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3438; 3439; AVX1-LABEL: test_mm_unpackhi_ps: 3440; AVX1: # %bb.0: 3441; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3442; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3443; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3444; 3445; AVX512-LABEL: test_mm_unpackhi_ps: 3446; AVX512: # %bb.0: 3447; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3448; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3449; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3450 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3451 ret <4 x float> %res 3452} 3453 3454define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3455; SSE-LABEL: test_mm_unpacklo_ps: 3456; SSE: # %bb.0: 3457; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 3458; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3459; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3460; 3461; AVX1-LABEL: test_mm_unpacklo_ps: 3462; AVX1: # %bb.0: 3463; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 3464; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3465; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3466; 3467; AVX512-LABEL: test_mm_unpacklo_ps: 3468; AVX512: # %bb.0: 3469; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 3470; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3471; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3472 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3473 ret <4 x float> %res 3474} 3475 3476define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3477; SSE-LABEL: test_mm_xor_ps: 3478; SSE: # %bb.0: 3479; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 3480; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3481; 3482; AVX1-LABEL: test_mm_xor_ps: 3483; AVX1: # %bb.0: 3484; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3485; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3486; 3487; AVX512-LABEL: test_mm_xor_ps: 3488; AVX512: # %bb.0: 3489; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 3490; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3491 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 3492 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 3493 %res = xor <4 x i32> %arg0, %arg1 3494 %bc = bitcast <4 x i32> %res to <4 x float> 3495 ret <4 x float> %bc 3496} 3497 3498!0 = !{i32 1} 3499