1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c 10 11define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 12; SSE-LABEL: test_mm_add_epi8: 13; SSE: # %bb.0: 14; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1] 15; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16; 17; AVX1-LABEL: test_mm_add_epi8: 18; AVX1: # %bb.0: 19; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] 20; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21; 22; AVX512-LABEL: test_mm_add_epi8: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 25; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 27 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 28 %res = add <16 x i8> %arg0, %arg1 29 %bc = bitcast <16 x i8> %res to <2 x i64> 30 ret <2 x i64> %bc 31} 32 33define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 34; SSE-LABEL: test_mm_add_epi16: 35; SSE: # %bb.0: 36; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1] 37; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 38; 39; AVX1-LABEL: test_mm_add_epi16: 40; AVX1: # %bb.0: 41; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] 42; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 43; 44; AVX512-LABEL: test_mm_add_epi16: 45; AVX512: # %bb.0: 46; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] 47; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 48 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 49 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 50 %res = add <8 x i16> %arg0, %arg1 51 %bc = bitcast <8 x i16> %res to <2 x i64> 52 ret <2 x i64> %bc 53} 54 55define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 56; SSE-LABEL: test_mm_add_epi32: 57; SSE: # %bb.0: 58; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1] 59; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 60; 61; AVX1-LABEL: test_mm_add_epi32: 62; AVX1: # %bb.0: 63; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 64; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 65; 66; AVX512-LABEL: test_mm_add_epi32: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 69; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 70 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 71 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 72 %res = add <4 x i32> %arg0, %arg1 73 %bc = bitcast <4 x i32> %res to <2 x i64> 74 ret <2 x i64> %bc 75} 76 77define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 78; SSE-LABEL: test_mm_add_epi64: 79; SSE: # %bb.0: 80; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1] 81; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 82; 83; AVX1-LABEL: test_mm_add_epi64: 84; AVX1: # %bb.0: 85; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1] 86; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 87; 88; AVX512-LABEL: test_mm_add_epi64: 89; AVX512: # %bb.0: 90; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 91; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 92 %res = add <2 x i64> %a0, %a1 93 ret <2 x i64> %res 94} 95 96define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 97; SSE-LABEL: test_mm_add_pd: 98; SSE: # %bb.0: 99; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1] 100; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 101; 102; AVX1-LABEL: test_mm_add_pd: 103; AVX1: # %bb.0: 104; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] 105; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106; 107; AVX512-LABEL: test_mm_add_pd: 108; AVX512: # %bb.0: 109; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 110; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 111 %res = fadd <2 x double> %a0, %a1 112 ret <2 x double> %res 113} 114 115define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 116; SSE-LABEL: test_mm_add_sd: 117; SSE: # %bb.0: 118; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1] 119; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 120; 121; AVX1-LABEL: test_mm_add_sd: 122; AVX1: # %bb.0: 123; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1] 124; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 125; 126; AVX512-LABEL: test_mm_add_sd: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] 129; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 130 %ext0 = extractelement <2 x double> %a0, i32 0 131 %ext1 = extractelement <2 x double> %a1, i32 0 132 %fadd = fadd double %ext0, %ext1 133 %res = insertelement <2 x double> %a0, double %fadd, i32 0 134 ret <2 x double> %res 135} 136 137define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 138; SSE-LABEL: test_mm_adds_epi8: 139; SSE: # %bb.0: 140; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1] 141; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 142; 143; AVX1-LABEL: test_mm_adds_epi8: 144; AVX1: # %bb.0: 145; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1] 146; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 147; 148; AVX512-LABEL: test_mm_adds_epi8: 149; AVX512: # %bb.0: 150; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 151; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 152 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 153 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 154 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1) 155 %bc = bitcast <16 x i8> %res to <2 x i64> 156 ret <2 x i64> %bc 157} 158declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 159 160define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 161; SSE-LABEL: test_mm_adds_epi16: 162; SSE: # %bb.0: 163; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1] 164; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 165; 166; AVX1-LABEL: test_mm_adds_epi16: 167; AVX1: # %bb.0: 168; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1] 169; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 170; 171; AVX512-LABEL: test_mm_adds_epi16: 172; AVX512: # %bb.0: 173; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 174; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 175 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 176 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 177 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1) 178 %bc = bitcast <8 x i16> %res to <2 x i64> 179 ret <2 x i64> %bc 180} 181declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 182 183define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 184; SSE-LABEL: test_mm_adds_epu8: 185; SSE: # %bb.0: 186; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1] 187; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 188; 189; AVX1-LABEL: test_mm_adds_epu8: 190; AVX1: # %bb.0: 191; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1] 192; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 193; 194; AVX512-LABEL: test_mm_adds_epu8: 195; AVX512: # %bb.0: 196; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 197; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 198 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 199 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 200 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1) 201 %bc = bitcast <16 x i8> %res to <2 x i64> 202 ret <2 x i64> %bc 203} 204declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 205 206define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 207; SSE-LABEL: test_mm_adds_epu16: 208; SSE: # %bb.0: 209; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1] 210; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 211; 212; AVX1-LABEL: test_mm_adds_epu16: 213; AVX1: # %bb.0: 214; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1] 215; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216; 217; AVX512-LABEL: test_mm_adds_epu16: 218; AVX512: # %bb.0: 219; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 220; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 221 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 222 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 223 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1) 224 %bc = bitcast <8 x i16> %res to <2 x i64> 225 ret <2 x i64> %bc 226} 227declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 228 229define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 230; SSE-LABEL: test_mm_and_pd: 231; SSE: # %bb.0: 232; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 233; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 234; 235; AVX1-LABEL: test_mm_and_pd: 236; AVX1: # %bb.0: 237; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 238; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 239; 240; AVX512-LABEL: test_mm_and_pd: 241; AVX512: # %bb.0: 242; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 243; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 244 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 245 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 246 %res = and <4 x i32> %arg0, %arg1 247 %bc = bitcast <4 x i32> %res to <2 x double> 248 ret <2 x double> %bc 249} 250 251define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 252; SSE-LABEL: test_mm_and_si128: 253; SSE: # %bb.0: 254; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 255; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 256; 257; AVX1-LABEL: test_mm_and_si128: 258; AVX1: # %bb.0: 259; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 260; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 261; 262; AVX512-LABEL: test_mm_and_si128: 263; AVX512: # %bb.0: 264; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 265; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 266 %res = and <2 x i64> %a0, %a1 267 ret <2 x i64> %res 268} 269 270define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 271; SSE-LABEL: test_mm_andnot_pd: 272; SSE: # %bb.0: 273; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 274; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 275; 276; AVX1-LABEL: test_mm_andnot_pd: 277; AVX1: # %bb.0: 278; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 279; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 280; 281; AVX512-LABEL: test_mm_andnot_pd: 282; AVX512: # %bb.0: 283; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] 284; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 285 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 286 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 287 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 288 %res = and <4 x i32> %not, %arg1 289 %bc = bitcast <4 x i32> %res to <2 x double> 290 ret <2 x double> %bc 291} 292 293define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 294; SSE-LABEL: test_mm_andnot_si128: 295; SSE: # %bb.0: 296; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] 297; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] 298; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] 299; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 300; 301; AVX1-LABEL: test_mm_andnot_si128: 302; AVX1: # %bb.0: 303; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 304; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 305; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 306; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 307; 308; AVX512-LABEL: test_mm_andnot_si128: 309; AVX512: # %bb.0: 310; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 311; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 312; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 313 %not = xor <2 x i64> %a0, <i64 -1, i64 -1> 314 %res = and <2 x i64> %not, %a1 315 ret <2 x i64> %res 316} 317 318define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 319; SSE-LABEL: test_mm_avg_epu8: 320; SSE: # %bb.0: 321; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1] 322; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 323; 324; AVX1-LABEL: test_mm_avg_epu8: 325; AVX1: # %bb.0: 326; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1] 327; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328; 329; AVX512-LABEL: test_mm_avg_epu8: 330; AVX512: # %bb.0: 331; AVX512-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0] 332; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 333; AVX512-NEXT: vpmovzxbw %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc9] 334; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 335; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] 336; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 337; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] 338; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x01] 339; AVX512-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 340; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 341; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 342 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 343 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 344 %zext0 = zext <16 x i8> %arg0 to <16 x i16> 345 %zext1 = zext <16 x i8> %arg1 to <16 x i16> 346 %add = add <16 x i16> %zext0, %zext1 347 %add1 = add <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 348 %lshr = lshr <16 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 349 %res = trunc <16 x i16> %lshr to <16 x i8> 350 %bc = bitcast <16 x i8> %res to <2 x i64> 351 ret <2 x i64> %bc 352} 353 354define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 355; SSE-LABEL: test_mm_avg_epu16: 356; SSE: # %bb.0: 357; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1] 358; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 359; 360; AVX1-LABEL: test_mm_avg_epu16: 361; AVX1: # %bb.0: 362; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1] 363; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 364; 365; AVX512-LABEL: test_mm_avg_epu16: 366; AVX512: # %bb.0: 367; AVX512-NEXT: vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0] 368; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 369; AVX512-NEXT: vpmovzxwd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc9] 370; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 371; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 372; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] 373; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 374; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x01] 375; AVX512-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] 376; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 377; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 378 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 379 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 380 %zext0 = zext <8 x i16> %arg0 to <8 x i32> 381 %zext1 = zext <8 x i16> %arg1 to <8 x i32> 382 %add = add <8 x i32> %zext0, %zext1 383 %add1 = add <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 384 %lshr = lshr <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 385 %res = trunc <8 x i32> %lshr to <8 x i16> 386 %bc = bitcast <8 x i16> %res to <2 x i64> 387 ret <2 x i64> %bc 388} 389 390define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { 391; SSE-LABEL: test_mm_bslli_si128: 392; SSE: # %bb.0: 393; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 394; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 395; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 396; 397; AVX1-LABEL: test_mm_bslli_si128: 398; AVX1: # %bb.0: 399; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 400; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 401; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 402; 403; AVX512-LABEL: test_mm_bslli_si128: 404; AVX512: # %bb.0: 405; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 406; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 407; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 408 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 409 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 410 %bc = bitcast <16 x i8> %res to <2 x i64> 411 ret <2 x i64> %bc 412} 413 414define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { 415; SSE-LABEL: test_mm_bsrli_si128: 416; SSE: # %bb.0: 417; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 418; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 419; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 420; 421; AVX1-LABEL: test_mm_bsrli_si128: 422; AVX1: # %bb.0: 423; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 424; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 425; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 426; 427; AVX512-LABEL: test_mm_bsrli_si128: 428; AVX512: # %bb.0: 429; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 430; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 431; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 432 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 433 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 434 %bc = bitcast <16 x i8> %res to <2 x i64> 435 ret <2 x i64> %bc 436} 437 438define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { 439; CHECK-LABEL: test_mm_castpd_ps: 440; CHECK: # %bb.0: 441; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 442 %res = bitcast <2 x double> %a0 to <4 x float> 443 ret <4 x float> %res 444} 445 446define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { 447; CHECK-LABEL: test_mm_castpd_si128: 448; CHECK: # %bb.0: 449; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 450 %res = bitcast <2 x double> %a0 to <2 x i64> 451 ret <2 x i64> %res 452} 453 454define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { 455; CHECK-LABEL: test_mm_castps_pd: 456; CHECK: # %bb.0: 457; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 458 %res = bitcast <4 x float> %a0 to <2 x double> 459 ret <2 x double> %res 460} 461 462define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { 463; CHECK-LABEL: test_mm_castps_si128: 464; CHECK: # %bb.0: 465; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 466 %res = bitcast <4 x float> %a0 to <2 x i64> 467 ret <2 x i64> %res 468} 469 470define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { 471; CHECK-LABEL: test_mm_castsi128_pd: 472; CHECK: # %bb.0: 473; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 474 %res = bitcast <2 x i64> %a0 to <2 x double> 475 ret <2 x double> %res 476} 477 478define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { 479; CHECK-LABEL: test_mm_castsi128_ps: 480; CHECK: # %bb.0: 481; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 482 %res = bitcast <2 x i64> %a0 to <4 x float> 483 ret <4 x float> %res 484} 485 486define void @test_mm_clflush(i8* %a0) nounwind { 487; X86-LABEL: test_mm_clflush: 488; X86: # %bb.0: 489; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 490; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38] 491; X86-NEXT: retl # encoding: [0xc3] 492; 493; X64-LABEL: test_mm_clflush: 494; X64: # %bb.0: 495; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] 496; X64-NEXT: retq # encoding: [0xc3] 497 call void @llvm.x86.sse2.clflush(i8* %a0) 498 ret void 499} 500declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone 501 502define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 503; SSE-LABEL: test_mm_cmpeq_epi8: 504; SSE: # %bb.0: 505; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1] 506; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 507; 508; AVX1-LABEL: test_mm_cmpeq_epi8: 509; AVX1: # %bb.0: 510; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1] 511; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512; 513; AVX512-LABEL: test_mm_cmpeq_epi8: 514; AVX512: # %bb.0: 515; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 516; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 517; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 518 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 519 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 520 %cmp = icmp eq <16 x i8> %arg0, %arg1 521 %res = sext <16 x i1> %cmp to <16 x i8> 522 %bc = bitcast <16 x i8> %res to <2 x i64> 523 ret <2 x i64> %bc 524} 525 526define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 527; SSE-LABEL: test_mm_cmpeq_epi16: 528; SSE: # %bb.0: 529; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1] 530; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 531; 532; AVX1-LABEL: test_mm_cmpeq_epi16: 533; AVX1: # %bb.0: 534; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1] 535; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 536; 537; AVX512-LABEL: test_mm_cmpeq_epi16: 538; AVX512: # %bb.0: 539; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 540; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 541; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 542 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 543 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 544 %cmp = icmp eq <8 x i16> %arg0, %arg1 545 %res = sext <8 x i1> %cmp to <8 x i16> 546 %bc = bitcast <8 x i16> %res to <2 x i64> 547 ret <2 x i64> %bc 548} 549 550define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 551; SSE-LABEL: test_mm_cmpeq_epi32: 552; SSE: # %bb.0: 553; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1] 554; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 555; 556; AVX1-LABEL: test_mm_cmpeq_epi32: 557; AVX1: # %bb.0: 558; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1] 559; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 560; 561; AVX512-LABEL: test_mm_cmpeq_epi32: 562; AVX512: # %bb.0: 563; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 564; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 565; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 567 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 568 %cmp = icmp eq <4 x i32> %arg0, %arg1 569 %res = sext <4 x i1> %cmp to <4 x i32> 570 %bc = bitcast <4 x i32> %res to <2 x i64> 571 ret <2 x i64> %bc 572} 573 574define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 575; SSE-LABEL: test_mm_cmpeq_pd: 576; SSE: # %bb.0: 577; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00] 578; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 579; 580; AVX1-LABEL: test_mm_cmpeq_pd: 581; AVX1: # %bb.0: 582; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00] 583; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 584; 585; AVX512-LABEL: test_mm_cmpeq_pd: 586; AVX512: # %bb.0: 587; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00] 588; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 589; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 590 %fcmp = fcmp oeq <2 x double> %a0, %a1 591 %sext = sext <2 x i1> %fcmp to <2 x i64> 592 %res = bitcast <2 x i64> %sext to <2 x double> 593 ret <2 x double> %res 594} 595 596define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 597; SSE-LABEL: test_mm_cmpeq_sd: 598; SSE: # %bb.0: 599; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00] 600; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 601; 602; AVX-LABEL: test_mm_cmpeq_sd: 603; AVX: # %bb.0: 604; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00] 605; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 606 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 607 ret <2 x double> %res 608} 609declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 610 611define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 612; SSE-LABEL: test_mm_cmpge_pd: 613; SSE: # %bb.0: 614; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02] 615; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 616; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 617; 618; AVX1-LABEL: test_mm_cmpge_pd: 619; AVX1: # %bb.0: 620; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02] 621; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 622; 623; AVX512-LABEL: test_mm_cmpge_pd: 624; AVX512: # %bb.0: 625; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02] 626; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 627; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 628 %fcmp = fcmp ole <2 x double> %a1, %a0 629 %sext = sext <2 x i1> %fcmp to <2 x i64> 630 %res = bitcast <2 x i64> %sext to <2 x double> 631 ret <2 x double> %res 632} 633 634define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 635; SSE-LABEL: test_mm_cmpge_sd: 636; SSE: # %bb.0: 637; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02] 638; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 639; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 640; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 641; 642; AVX-LABEL: test_mm_cmpge_sd: 643; AVX: # %bb.0: 644; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02] 645; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 646; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 647; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 648 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) 649 %ext0 = extractelement <2 x double> %cmp, i32 0 650 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 651 %ext1 = extractelement <2 x double> %a0, i32 1 652 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 653 ret <2 x double> %ins1 654} 655 656define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 657; SSE-LABEL: test_mm_cmpgt_epi8: 658; SSE: # %bb.0: 659; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1] 660; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 661; 662; AVX1-LABEL: test_mm_cmpgt_epi8: 663; AVX1: # %bb.0: 664; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1] 665; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 666; 667; AVX512-LABEL: test_mm_cmpgt_epi8: 668; AVX512: # %bb.0: 669; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 670; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 671; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 672 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 673 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 674 %cmp = icmp sgt <16 x i8> %arg0, %arg1 675 %res = sext <16 x i1> %cmp to <16 x i8> 676 %bc = bitcast <16 x i8> %res to <2 x i64> 677 ret <2 x i64> %bc 678} 679 680define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 681; SSE-LABEL: test_mm_cmpgt_epi16: 682; SSE: # %bb.0: 683; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1] 684; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 685; 686; AVX1-LABEL: test_mm_cmpgt_epi16: 687; AVX1: # %bb.0: 688; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1] 689; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 690; 691; AVX512-LABEL: test_mm_cmpgt_epi16: 692; AVX512: # %bb.0: 693; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 694; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 695; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 696 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 697 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 698 %cmp = icmp sgt <8 x i16> %arg0, %arg1 699 %res = sext <8 x i1> %cmp to <8 x i16> 700 %bc = bitcast <8 x i16> %res to <2 x i64> 701 ret <2 x i64> %bc 702} 703 704define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 705; SSE-LABEL: test_mm_cmpgt_epi32: 706; SSE: # %bb.0: 707; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1] 708; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 709; 710; AVX1-LABEL: test_mm_cmpgt_epi32: 711; AVX1: # %bb.0: 712; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1] 713; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 714; 715; AVX512-LABEL: test_mm_cmpgt_epi32: 716; AVX512: # %bb.0: 717; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 718; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 719; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 721 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 722 %cmp = icmp sgt <4 x i32> %arg0, %arg1 723 %res = sext <4 x i1> %cmp to <4 x i32> 724 %bc = bitcast <4 x i32> %res to <2 x i64> 725 ret <2 x i64> %bc 726} 727 728define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 729; SSE-LABEL: test_mm_cmpgt_pd: 730; SSE: # %bb.0: 731; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01] 732; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 733; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 734; 735; AVX1-LABEL: test_mm_cmpgt_pd: 736; AVX1: # %bb.0: 737; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01] 738; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 739; 740; AVX512-LABEL: test_mm_cmpgt_pd: 741; AVX512: # %bb.0: 742; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01] 743; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 744; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 745 %fcmp = fcmp olt <2 x double> %a1, %a0 746 %sext = sext <2 x i1> %fcmp to <2 x i64> 747 %res = bitcast <2 x i64> %sext to <2 x double> 748 ret <2 x double> %res 749} 750 751define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 752; SSE-LABEL: test_mm_cmpgt_sd: 753; SSE: # %bb.0: 754; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01] 755; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 756; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 757; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 758; 759; AVX-LABEL: test_mm_cmpgt_sd: 760; AVX: # %bb.0: 761; AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01] 762; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 763; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 764; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 765 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) 766 %ext0 = extractelement <2 x double> %cmp, i32 0 767 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 768 %ext1 = extractelement <2 x double> %a0, i32 1 769 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 770 ret <2 x double> %ins1 771} 772 773define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 774; SSE-LABEL: test_mm_cmple_pd: 775; SSE: # %bb.0: 776; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02] 777; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 778; 779; AVX1-LABEL: test_mm_cmple_pd: 780; AVX1: # %bb.0: 781; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02] 782; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 783; 784; AVX512-LABEL: test_mm_cmple_pd: 785; AVX512: # %bb.0: 786; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 787; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 788; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 789 %fcmp = fcmp ole <2 x double> %a0, %a1 790 %sext = sext <2 x i1> %fcmp to <2 x i64> 791 %res = bitcast <2 x i64> %sext to <2 x double> 792 ret <2 x double> %res 793} 794 795define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 796; SSE-LABEL: test_mm_cmple_sd: 797; SSE: # %bb.0: 798; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02] 799; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 800; 801; AVX-LABEL: test_mm_cmple_sd: 802; AVX: # %bb.0: 803; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02] 804; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 805 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) 806 ret <2 x double> %res 807} 808 809define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 810; SSE-LABEL: test_mm_cmplt_epi8: 811; SSE: # %bb.0: 812; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8] 813; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 814; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 815; 816; AVX1-LABEL: test_mm_cmplt_epi8: 817; AVX1: # %bb.0: 818; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0] 819; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 820; 821; AVX512-LABEL: test_mm_cmplt_epi8: 822; AVX512: # %bb.0: 823; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0] 824; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 825; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 826 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 827 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 828 %cmp = icmp sgt <16 x i8> %arg1, %arg0 829 %res = sext <16 x i1> %cmp to <16 x i8> 830 %bc = bitcast <16 x i8> %res to <2 x i64> 831 ret <2 x i64> %bc 832} 833 834define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 835; SSE-LABEL: test_mm_cmplt_epi16: 836; SSE: # %bb.0: 837; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8] 838; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 839; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 840; 841; AVX1-LABEL: test_mm_cmplt_epi16: 842; AVX1: # %bb.0: 843; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0] 844; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 845; 846; AVX512-LABEL: test_mm_cmplt_epi16: 847; AVX512: # %bb.0: 848; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] 849; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 850; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 851 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 852 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 853 %cmp = icmp sgt <8 x i16> %arg1, %arg0 854 %res = sext <8 x i1> %cmp to <8 x i16> 855 %bc = bitcast <8 x i16> %res to <2 x i64> 856 ret <2 x i64> %bc 857} 858 859define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 860; SSE-LABEL: test_mm_cmplt_epi32: 861; SSE: # %bb.0: 862; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8] 863; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] 864; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 865; 866; AVX1-LABEL: test_mm_cmplt_epi32: 867; AVX1: # %bb.0: 868; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0] 869; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 870; 871; AVX512-LABEL: test_mm_cmplt_epi32: 872; AVX512: # %bb.0: 873; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0] 874; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 875; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 876 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 877 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 878 %cmp = icmp sgt <4 x i32> %arg1, %arg0 879 %res = sext <4 x i1> %cmp to <4 x i32> 880 %bc = bitcast <4 x i32> %res to <2 x i64> 881 ret <2 x i64> %bc 882} 883 884define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 885; SSE-LABEL: test_mm_cmplt_pd: 886; SSE: # %bb.0: 887; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01] 888; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 889; 890; AVX1-LABEL: test_mm_cmplt_pd: 891; AVX1: # %bb.0: 892; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01] 893; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 894; 895; AVX512-LABEL: test_mm_cmplt_pd: 896; AVX512: # %bb.0: 897; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01] 898; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 899; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 900 %fcmp = fcmp olt <2 x double> %a0, %a1 901 %sext = sext <2 x i1> %fcmp to <2 x i64> 902 %res = bitcast <2 x i64> %sext to <2 x double> 903 ret <2 x double> %res 904} 905 906define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 907; SSE-LABEL: test_mm_cmplt_sd: 908; SSE: # %bb.0: 909; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01] 910; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 911; 912; AVX-LABEL: test_mm_cmplt_sd: 913; AVX: # %bb.0: 914; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01] 915; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 916 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) 917 ret <2 x double> %res 918} 919 920define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 921; SSE-LABEL: test_mm_cmpneq_pd: 922; SSE: # %bb.0: 923; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04] 924; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 925; 926; AVX1-LABEL: test_mm_cmpneq_pd: 927; AVX1: # %bb.0: 928; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04] 929; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 930; 931; AVX512-LABEL: test_mm_cmpneq_pd: 932; AVX512: # %bb.0: 933; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04] 934; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 935; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 936 %fcmp = fcmp une <2 x double> %a0, %a1 937 %sext = sext <2 x i1> %fcmp to <2 x i64> 938 %res = bitcast <2 x i64> %sext to <2 x double> 939 ret <2 x double> %res 940} 941 942define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 943; SSE-LABEL: test_mm_cmpneq_sd: 944; SSE: # %bb.0: 945; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04] 946; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 947; 948; AVX-LABEL: test_mm_cmpneq_sd: 949; AVX: # %bb.0: 950; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04] 951; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 952 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) 953 ret <2 x double> %res 954} 955 956define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 957; SSE-LABEL: test_mm_cmpnge_pd: 958; SSE: # %bb.0: 959; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06] 960; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 961; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 962; 963; AVX1-LABEL: test_mm_cmpnge_pd: 964; AVX1: # %bb.0: 965; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06] 966; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 967; 968; AVX512-LABEL: test_mm_cmpnge_pd: 969; AVX512: # %bb.0: 970; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06] 971; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 972; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 973 %fcmp = fcmp ugt <2 x double> %a1, %a0 974 %sext = sext <2 x i1> %fcmp to <2 x i64> 975 %res = bitcast <2 x i64> %sext to <2 x double> 976 ret <2 x double> %res 977} 978 979define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 980; SSE-LABEL: test_mm_cmpnge_sd: 981; SSE: # %bb.0: 982; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06] 983; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 984; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 985; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 986; 987; AVX-LABEL: test_mm_cmpnge_sd: 988; AVX: # %bb.0: 989; AVX-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06] 990; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 991; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 992; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 993 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) 994 %ext0 = extractelement <2 x double> %cmp, i32 0 995 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 996 %ext1 = extractelement <2 x double> %a0, i32 1 997 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 998 ret <2 x double> %ins1 999} 1000 1001define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1002; SSE-LABEL: test_mm_cmpngt_pd: 1003; SSE: # %bb.0: 1004; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05] 1005; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 1006; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1007; 1008; AVX1-LABEL: test_mm_cmpngt_pd: 1009; AVX1: # %bb.0: 1010; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05] 1011; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1012; 1013; AVX512-LABEL: test_mm_cmpngt_pd: 1014; AVX512: # %bb.0: 1015; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05] 1016; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1017; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1018 %fcmp = fcmp uge <2 x double> %a1, %a0 1019 %sext = sext <2 x i1> %fcmp to <2 x i64> 1020 %res = bitcast <2 x i64> %sext to <2 x double> 1021 ret <2 x double> %res 1022} 1023 1024define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1025; SSE-LABEL: test_mm_cmpngt_sd: 1026; SSE: # %bb.0: 1027; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05] 1028; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 1029; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 1030; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1031; 1032; AVX-LABEL: test_mm_cmpngt_sd: 1033; AVX: # %bb.0: 1034; AVX-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05] 1035; AVX-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] 1036; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1] 1037; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1038 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) 1039 %ext0 = extractelement <2 x double> %cmp, i32 0 1040 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 1041 %ext1 = extractelement <2 x double> %a0, i32 1 1042 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 1043 ret <2 x double> %ins1 1044} 1045 1046define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1047; SSE-LABEL: test_mm_cmpnle_pd: 1048; SSE: # %bb.0: 1049; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06] 1050; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1051; 1052; AVX1-LABEL: test_mm_cmpnle_pd: 1053; AVX1: # %bb.0: 1054; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06] 1055; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1056; 1057; AVX512-LABEL: test_mm_cmpnle_pd: 1058; AVX512: # %bb.0: 1059; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06] 1060; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1061; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1062 %fcmp = fcmp ugt <2 x double> %a0, %a1 1063 %sext = sext <2 x i1> %fcmp to <2 x i64> 1064 %res = bitcast <2 x i64> %sext to <2 x double> 1065 ret <2 x double> %res 1066} 1067 1068define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1069; SSE-LABEL: test_mm_cmpnle_sd: 1070; SSE: # %bb.0: 1071; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06] 1072; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1073; 1074; AVX-LABEL: test_mm_cmpnle_sd: 1075; AVX: # %bb.0: 1076; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06] 1077; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1078 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) 1079 ret <2 x double> %res 1080} 1081 1082define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1083; SSE-LABEL: test_mm_cmpnlt_pd: 1084; SSE: # %bb.0: 1085; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05] 1086; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1087; 1088; AVX1-LABEL: test_mm_cmpnlt_pd: 1089; AVX1: # %bb.0: 1090; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05] 1091; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1092; 1093; AVX512-LABEL: test_mm_cmpnlt_pd: 1094; AVX512: # %bb.0: 1095; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05] 1096; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1097; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1098 %fcmp = fcmp uge <2 x double> %a0, %a1 1099 %sext = sext <2 x i1> %fcmp to <2 x i64> 1100 %res = bitcast <2 x i64> %sext to <2 x double> 1101 ret <2 x double> %res 1102} 1103 1104define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1105; SSE-LABEL: test_mm_cmpnlt_sd: 1106; SSE: # %bb.0: 1107; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05] 1108; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1109; 1110; AVX-LABEL: test_mm_cmpnlt_sd: 1111; AVX: # %bb.0: 1112; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05] 1113; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1114 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) 1115 ret <2 x double> %res 1116} 1117 1118define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1119; SSE-LABEL: test_mm_cmpord_pd: 1120; SSE: # %bb.0: 1121; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07] 1122; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1123; 1124; AVX1-LABEL: test_mm_cmpord_pd: 1125; AVX1: # %bb.0: 1126; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07] 1127; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1128; 1129; AVX512-LABEL: test_mm_cmpord_pd: 1130; AVX512: # %bb.0: 1131; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07] 1132; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1133; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1134 %fcmp = fcmp ord <2 x double> %a0, %a1 1135 %sext = sext <2 x i1> %fcmp to <2 x i64> 1136 %res = bitcast <2 x i64> %sext to <2 x double> 1137 ret <2 x double> %res 1138} 1139 1140define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1141; SSE-LABEL: test_mm_cmpord_sd: 1142; SSE: # %bb.0: 1143; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07] 1144; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1145; 1146; AVX-LABEL: test_mm_cmpord_sd: 1147; AVX: # %bb.0: 1148; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07] 1149; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1150 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) 1151 ret <2 x double> %res 1152} 1153 1154define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1155; SSE-LABEL: test_mm_cmpunord_pd: 1156; SSE: # %bb.0: 1157; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03] 1158; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1159; 1160; AVX1-LABEL: test_mm_cmpunord_pd: 1161; AVX1: # %bb.0: 1162; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03] 1163; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1164; 1165; AVX512-LABEL: test_mm_cmpunord_pd: 1166; AVX512: # %bb.0: 1167; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03] 1168; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] 1169; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1170 %fcmp = fcmp uno <2 x double> %a0, %a1 1171 %sext = sext <2 x i1> %fcmp to <2 x i64> 1172 %res = bitcast <2 x i64> %sext to <2 x double> 1173 ret <2 x double> %res 1174} 1175 1176define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1177; SSE-LABEL: test_mm_cmpunord_sd: 1178; SSE: # %bb.0: 1179; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03] 1180; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1181; 1182; AVX-LABEL: test_mm_cmpunord_sd: 1183; AVX: # %bb.0: 1184; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03] 1185; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1186 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) 1187 ret <2 x double> %res 1188} 1189 1190define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1191; SSE-LABEL: test_mm_comieq_sd: 1192; SSE: # %bb.0: 1193; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1194; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1195; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1196; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1197; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1198; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1199; 1200; AVX1-LABEL: test_mm_comieq_sd: 1201; AVX1: # %bb.0: 1202; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1203; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1204; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1205; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1206; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1207; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1208; 1209; AVX512-LABEL: test_mm_comieq_sd: 1210; AVX512: # %bb.0: 1211; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1212; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 1213; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 1214; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 1215; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1216; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1217 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 1218 ret i32 %res 1219} 1220declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 1221 1222define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1223; SSE-LABEL: test_mm_comige_sd: 1224; SSE: # %bb.0: 1225; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1226; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1227; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1228; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1229; 1230; AVX1-LABEL: test_mm_comige_sd: 1231; AVX1: # %bb.0: 1232; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1233; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1234; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1235; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1236; 1237; AVX512-LABEL: test_mm_comige_sd: 1238; AVX512: # %bb.0: 1239; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1240; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1241; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1242; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1243 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) 1244 ret i32 %res 1245} 1246declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 1247 1248define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1249; SSE-LABEL: test_mm_comigt_sd: 1250; SSE: # %bb.0: 1251; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1252; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1253; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1254; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1255; 1256; AVX1-LABEL: test_mm_comigt_sd: 1257; AVX1: # %bb.0: 1258; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1259; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1260; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1261; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1262; 1263; AVX512-LABEL: test_mm_comigt_sd: 1264; AVX512: # %bb.0: 1265; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1266; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1267; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1268; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1269 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) 1270 ret i32 %res 1271} 1272declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 1273 1274define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1275; SSE-LABEL: test_mm_comile_sd: 1276; SSE: # %bb.0: 1277; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1278; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1279; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1280; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1281; 1282; AVX1-LABEL: test_mm_comile_sd: 1283; AVX1: # %bb.0: 1284; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1285; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1286; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1287; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1288; 1289; AVX512-LABEL: test_mm_comile_sd: 1290; AVX512: # %bb.0: 1291; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1292; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1293; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 1294; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1295 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) 1296 ret i32 %res 1297} 1298declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 1299 1300define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1301; SSE-LABEL: test_mm_comilt_sd: 1302; SSE: # %bb.0: 1303; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1304; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] 1305; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1306; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1307; 1308; AVX1-LABEL: test_mm_comilt_sd: 1309; AVX1: # %bb.0: 1310; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1311; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] 1312; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1313; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1314; 1315; AVX512-LABEL: test_mm_comilt_sd: 1316; AVX512: # %bb.0: 1317; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 1318; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] 1319; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 1320; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1321 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) 1322 ret i32 %res 1323} 1324declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 1325 1326define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1327; SSE-LABEL: test_mm_comineq_sd: 1328; SSE: # %bb.0: 1329; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] 1330; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1331; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1332; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1333; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1334; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1335; 1336; AVX1-LABEL: test_mm_comineq_sd: 1337; AVX1: # %bb.0: 1338; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] 1339; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1340; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1341; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1342; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1343; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1344; 1345; AVX512-LABEL: test_mm_comineq_sd: 1346; AVX512: # %bb.0: 1347; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] 1348; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 1349; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 1350; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 1351; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 1352; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1353 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) 1354 ret i32 %res 1355} 1356declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 1357 1358define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { 1359; SSE-LABEL: test_mm_cvtepi32_pd: 1360; SSE: # %bb.0: 1361; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0] 1362; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1363; 1364; AVX1-LABEL: test_mm_cvtepi32_pd: 1365; AVX1: # %bb.0: 1366; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] 1367; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1368; 1369; AVX512-LABEL: test_mm_cvtepi32_pd: 1370; AVX512: # %bb.0: 1371; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] 1372; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1373 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1374 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> 1375 %res = sitofp <2 x i32> %ext to <2 x double> 1376 ret <2 x double> %res 1377} 1378 1379define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { 1380; SSE-LABEL: test_mm_cvtepi32_ps: 1381; SSE: # %bb.0: 1382; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0] 1383; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1384; 1385; AVX1-LABEL: test_mm_cvtepi32_ps: 1386; AVX1: # %bb.0: 1387; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0] 1388; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1389; 1390; AVX512-LABEL: test_mm_cvtepi32_ps: 1391; AVX512: # %bb.0: 1392; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] 1393; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1394 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1395 %res = sitofp <4 x i32> %arg0 to <4 x float> 1396 ret <4 x float> %res 1397} 1398 1399define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { 1400; SSE-LABEL: test_mm_cvtpd_epi32: 1401; SSE: # %bb.0: 1402; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0] 1403; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1404; 1405; AVX1-LABEL: test_mm_cvtpd_epi32: 1406; AVX1: # %bb.0: 1407; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0] 1408; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1409; 1410; AVX512-LABEL: test_mm_cvtpd_epi32: 1411; AVX512: # %bb.0: 1412; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] 1413; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1414 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 1415 %bc = bitcast <4 x i32> %res to <2 x i64> 1416 ret <2 x i64> %bc 1417} 1418declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 1419 1420define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { 1421; SSE-LABEL: test_mm_cvtpd_ps: 1422; SSE: # %bb.0: 1423; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0] 1424; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1425; 1426; AVX1-LABEL: test_mm_cvtpd_ps: 1427; AVX1: # %bb.0: 1428; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0] 1429; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1430; 1431; AVX512-LABEL: test_mm_cvtpd_ps: 1432; AVX512: # %bb.0: 1433; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] 1434; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1435 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 1436 ret <4 x float> %res 1437} 1438declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 1439 1440define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { 1441; SSE-LABEL: test_mm_cvtps_epi32: 1442; SSE: # %bb.0: 1443; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0] 1444; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1445; 1446; AVX1-LABEL: test_mm_cvtps_epi32: 1447; AVX1: # %bb.0: 1448; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0] 1449; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1450; 1451; AVX512-LABEL: test_mm_cvtps_epi32: 1452; AVX512: # %bb.0: 1453; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] 1454; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1455 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 1456 %bc = bitcast <4 x i32> %res to <2 x i64> 1457 ret <2 x i64> %bc 1458} 1459declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 1460 1461define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { 1462; SSE-LABEL: test_mm_cvtps_pd: 1463; SSE: # %bb.0: 1464; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0] 1465; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1466; 1467; AVX1-LABEL: test_mm_cvtps_pd: 1468; AVX1: # %bb.0: 1469; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] 1470; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1471; 1472; AVX512-LABEL: test_mm_cvtps_pd: 1473; AVX512: # %bb.0: 1474; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] 1475; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1476 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 1477 %res = fpext <2 x float> %ext to <2 x double> 1478 ret <2 x double> %res 1479} 1480 1481define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { 1482; X86-SSE-LABEL: test_mm_cvtsd_f64: 1483; X86-SSE: # %bb.0: 1484; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 1485; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1486; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1487; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1488; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24] 1489; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1490; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1491; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 1492; X86-SSE-NEXT: retl # encoding: [0xc3] 1493; 1494; X86-AVX1-LABEL: test_mm_cvtsd_f64: 1495; X86-AVX1: # %bb.0: 1496; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 1497; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1498; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1499; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1500; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24] 1501; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1502; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1503; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 1504; X86-AVX1-NEXT: retl # encoding: [0xc3] 1505; 1506; X86-AVX512-LABEL: test_mm_cvtsd_f64: 1507; X86-AVX512: # %bb.0: 1508; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 1509; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 1510; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 1511; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 1512; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24] 1513; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 1514; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 1515; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 1516; X86-AVX512-NEXT: retl # encoding: [0xc3] 1517; 1518; X64-LABEL: test_mm_cvtsd_f64: 1519; X64: # %bb.0: 1520; X64-NEXT: retq # encoding: [0xc3] 1521 %res = extractelement <2 x double> %a0, i32 0 1522 ret double %res 1523} 1524 1525define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { 1526; SSE-LABEL: test_mm_cvtsd_si32: 1527; SSE: # %bb.0: 1528; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0] 1529; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1530; 1531; AVX1-LABEL: test_mm_cvtsd_si32: 1532; AVX1: # %bb.0: 1533; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0] 1534; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1535; 1536; AVX512-LABEL: test_mm_cvtsd_si32: 1537; AVX512: # %bb.0: 1538; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] 1539; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1540 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 1541 ret i32 %res 1542} 1543declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 1544 1545define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { 1546; SSE-LABEL: test_mm_cvtsd_ss: 1547; SSE: # %bb.0: 1548; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1] 1549; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1550; 1551; AVX-LABEL: test_mm_cvtsd_ss: 1552; AVX: # %bb.0: 1553; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] 1554; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1555 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1556 ret <4 x float> %res 1557} 1558declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 1559 1560define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { 1561; X86-SSE-LABEL: test_mm_cvtsd_ss_load: 1562; X86-SSE: # %bb.0: 1563; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1564; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00] 1565; X86-SSE-NEXT: retl # encoding: [0xc3] 1566; 1567; X86-AVX-LABEL: test_mm_cvtsd_ss_load: 1568; X86-AVX: # %bb.0: 1569; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1570; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] 1571; X86-AVX-NEXT: retl # encoding: [0xc3] 1572; 1573; X64-SSE-LABEL: test_mm_cvtsd_ss_load: 1574; X64-SSE: # %bb.0: 1575; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07] 1576; X64-SSE-NEXT: retq # encoding: [0xc3] 1577; 1578; X64-AVX-LABEL: test_mm_cvtsd_ss_load: 1579; X64-AVX: # %bb.0: 1580; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] 1581; X64-AVX-NEXT: retq # encoding: [0xc3] 1582 %a1 = load <2 x double>, <2 x double>* %p1 1583 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) 1584 ret <4 x float> %res 1585} 1586 1587define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { 1588; SSE-LABEL: test_mm_cvtsi128_si32: 1589; SSE: # %bb.0: 1590; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] 1591; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1592; 1593; AVX1-LABEL: test_mm_cvtsi128_si32: 1594; AVX1: # %bb.0: 1595; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] 1596; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1597; 1598; AVX512-LABEL: test_mm_cvtsi128_si32: 1599; AVX512: # %bb.0: 1600; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 1601; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1602 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1603 %res = extractelement <4 x i32> %arg0, i32 0 1604 ret i32 %res 1605} 1606 1607define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { 1608; X86-SSE-LABEL: test_mm_cvtsi32_sd: 1609; X86-SSE: # %bb.0: 1610; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] 1611; X86-SSE-NEXT: retl # encoding: [0xc3] 1612; 1613; X86-AVX1-LABEL: test_mm_cvtsi32_sd: 1614; X86-AVX1: # %bb.0: 1615; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1616; X86-AVX1-NEXT: retl # encoding: [0xc3] 1617; 1618; X86-AVX512-LABEL: test_mm_cvtsi32_sd: 1619; X86-AVX512: # %bb.0: 1620; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] 1621; X86-AVX512-NEXT: retl # encoding: [0xc3] 1622; 1623; X64-SSE-LABEL: test_mm_cvtsi32_sd: 1624; X64-SSE: # %bb.0: 1625; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] 1626; X64-SSE-NEXT: retq # encoding: [0xc3] 1627; 1628; X64-AVX1-LABEL: test_mm_cvtsi32_sd: 1629; X64-AVX1: # %bb.0: 1630; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] 1631; X64-AVX1-NEXT: retq # encoding: [0xc3] 1632; 1633; X64-AVX512-LABEL: test_mm_cvtsi32_sd: 1634; X64-AVX512: # %bb.0: 1635; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] 1636; X64-AVX512-NEXT: retq # encoding: [0xc3] 1637 %cvt = sitofp i32 %a1 to double 1638 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1639 ret <2 x double> %res 1640} 1641 1642define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { 1643; X86-SSE-LABEL: test_mm_cvtsi32_si128: 1644; X86-SSE: # %bb.0: 1645; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 1646; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1647; X86-SSE-NEXT: retl # encoding: [0xc3] 1648; 1649; X86-AVX1-LABEL: test_mm_cvtsi32_si128: 1650; X86-AVX1: # %bb.0: 1651; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1652; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1653; X86-AVX1-NEXT: retl # encoding: [0xc3] 1654; 1655; X86-AVX512-LABEL: test_mm_cvtsi32_si128: 1656; X86-AVX512: # %bb.0: 1657; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 1658; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1659; X86-AVX512-NEXT: retl # encoding: [0xc3] 1660; 1661; X64-SSE-LABEL: test_mm_cvtsi32_si128: 1662; X64-SSE: # %bb.0: 1663; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 1664; X64-SSE-NEXT: retq # encoding: [0xc3] 1665; 1666; X64-AVX1-LABEL: test_mm_cvtsi32_si128: 1667; X64-AVX1: # %bb.0: 1668; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 1669; X64-AVX1-NEXT: retq # encoding: [0xc3] 1670; 1671; X64-AVX512-LABEL: test_mm_cvtsi32_si128: 1672; X64-AVX512: # %bb.0: 1673; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 1674; X64-AVX512-NEXT: retq # encoding: [0xc3] 1675 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 1676 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 1677 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 1678 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3 1679 %res = bitcast <4 x i32> %res3 to <2 x i64> 1680 ret <2 x i64> %res 1681} 1682 1683define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { 1684; SSE-LABEL: test_mm_cvtss_sd: 1685; SSE: # %bb.0: 1686; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1] 1687; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1688; 1689; AVX1-LABEL: test_mm_cvtss_sd: 1690; AVX1: # %bb.0: 1691; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1] 1692; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1693; 1694; AVX512-LABEL: test_mm_cvtss_sd: 1695; AVX512: # %bb.0: 1696; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] 1697; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1698 %ext = extractelement <4 x float> %a1, i32 0 1699 %cvt = fpext float %ext to double 1700 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1701 ret <2 x double> %res 1702} 1703 1704define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { 1705; SSE-LABEL: test_mm_cvttpd_epi32: 1706; SSE: # %bb.0: 1707; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0] 1708; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1709; 1710; AVX1-LABEL: test_mm_cvttpd_epi32: 1711; AVX1: # %bb.0: 1712; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0] 1713; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1714; 1715; AVX512-LABEL: test_mm_cvttpd_epi32: 1716; AVX512: # %bb.0: 1717; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] 1718; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1719 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 1720 %bc = bitcast <4 x i32> %res to <2 x i64> 1721 ret <2 x i64> %bc 1722} 1723declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 1724 1725define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { 1726; SSE-LABEL: test_mm_cvttps_epi32: 1727; SSE: # %bb.0: 1728; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0] 1729; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1730; 1731; AVX1-LABEL: test_mm_cvttps_epi32: 1732; AVX1: # %bb.0: 1733; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0] 1734; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735; 1736; AVX512-LABEL: test_mm_cvttps_epi32: 1737; AVX512: # %bb.0: 1738; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] 1739; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1740 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) 1741 %bc = bitcast <4 x i32> %res to <2 x i64> 1742 ret <2 x i64> %bc 1743} 1744declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 1745 1746define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { 1747; SSE-LABEL: test_mm_cvttsd_si32: 1748; SSE: # %bb.0: 1749; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0] 1750; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1751; 1752; AVX1-LABEL: test_mm_cvttsd_si32: 1753; AVX1: # %bb.0: 1754; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0] 1755; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1756; 1757; AVX512-LABEL: test_mm_cvttsd_si32: 1758; AVX512: # %bb.0: 1759; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] 1760; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1761 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 1762 ret i32 %res 1763} 1764declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 1765 1766define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1767; SSE-LABEL: test_mm_div_pd: 1768; SSE: # %bb.0: 1769; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1] 1770; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1771; 1772; AVX1-LABEL: test_mm_div_pd: 1773; AVX1: # %bb.0: 1774; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1] 1775; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1776; 1777; AVX512-LABEL: test_mm_div_pd: 1778; AVX512: # %bb.0: 1779; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1] 1780; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1781 %res = fdiv <2 x double> %a0, %a1 1782 ret <2 x double> %res 1783} 1784 1785define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1786; SSE-LABEL: test_mm_div_sd: 1787; SSE: # %bb.0: 1788; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1] 1789; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1790; 1791; AVX1-LABEL: test_mm_div_sd: 1792; AVX1: # %bb.0: 1793; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1] 1794; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1795; 1796; AVX512-LABEL: test_mm_div_sd: 1797; AVX512: # %bb.0: 1798; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] 1799; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1800 %ext0 = extractelement <2 x double> %a0, i32 0 1801 %ext1 = extractelement <2 x double> %a1, i32 0 1802 %fdiv = fdiv double %ext0, %ext1 1803 %res = insertelement <2 x double> %a0, double %fdiv, i32 0 1804 ret <2 x double> %res 1805} 1806 1807define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { 1808; SSE-LABEL: test_mm_extract_epi16: 1809; SSE: # %bb.0: 1810; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01] 1811; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1812; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1813; 1814; AVX1-LABEL: test_mm_extract_epi16: 1815; AVX1: # %bb.0: 1816; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1817; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1818; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1819; 1820; AVX512-LABEL: test_mm_extract_epi16: 1821; AVX512: # %bb.0: 1822; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01] 1823; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] 1824; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1825 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1826 %ext = extractelement <8 x i16> %arg0, i32 1 1827 %res = zext i16 %ext to i32 1828 ret i32 %res 1829} 1830 1831define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { 1832; X86-SSE-LABEL: test_mm_insert_epi16: 1833; X86-SSE: # %bb.0: 1834; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1835; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01] 1836; X86-SSE-NEXT: retl # encoding: [0xc3] 1837; 1838; X86-AVX1-LABEL: test_mm_insert_epi16: 1839; X86-AVX1: # %bb.0: 1840; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1841; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1842; X86-AVX1-NEXT: retl # encoding: [0xc3] 1843; 1844; X86-AVX512-LABEL: test_mm_insert_epi16: 1845; X86-AVX512: # %bb.0: 1846; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 1847; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 1848; X86-AVX512-NEXT: retl # encoding: [0xc3] 1849; 1850; X64-SSE-LABEL: test_mm_insert_epi16: 1851; X64-SSE: # %bb.0: 1852; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] 1853; X64-SSE-NEXT: retq # encoding: [0xc3] 1854; 1855; X64-AVX1-LABEL: test_mm_insert_epi16: 1856; X64-AVX1: # %bb.0: 1857; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1858; X64-AVX1-NEXT: retq # encoding: [0xc3] 1859; 1860; X64-AVX512-LABEL: test_mm_insert_epi16: 1861; X64-AVX512: # %bb.0: 1862; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] 1863; X64-AVX512-NEXT: retq # encoding: [0xc3] 1864 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1865 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 1866 %bc = bitcast <8 x i16> %res to <2 x i64> 1867 ret <2 x i64> %bc 1868} 1869 1870define void @test_mm_lfence() nounwind { 1871; CHECK-LABEL: test_mm_lfence: 1872; CHECK: # %bb.0: 1873; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8] 1874; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1875 call void @llvm.x86.sse2.lfence() 1876 ret void 1877} 1878declare void @llvm.x86.sse2.lfence() nounwind readnone 1879 1880define <2 x double> @test_mm_load_pd(double* %a0) nounwind { 1881; X86-SSE-LABEL: test_mm_load_pd: 1882; X86-SSE: # %bb.0: 1883; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1884; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1885; X86-SSE-NEXT: retl # encoding: [0xc3] 1886; 1887; X86-AVX1-LABEL: test_mm_load_pd: 1888; X86-AVX1: # %bb.0: 1889; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1890; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1891; X86-AVX1-NEXT: retl # encoding: [0xc3] 1892; 1893; X86-AVX512-LABEL: test_mm_load_pd: 1894; X86-AVX512: # %bb.0: 1895; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1896; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1897; X86-AVX512-NEXT: retl # encoding: [0xc3] 1898; 1899; X64-SSE-LABEL: test_mm_load_pd: 1900; X64-SSE: # %bb.0: 1901; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1902; X64-SSE-NEXT: retq # encoding: [0xc3] 1903; 1904; X64-AVX1-LABEL: test_mm_load_pd: 1905; X64-AVX1: # %bb.0: 1906; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1907; X64-AVX1-NEXT: retq # encoding: [0xc3] 1908; 1909; X64-AVX512-LABEL: test_mm_load_pd: 1910; X64-AVX512: # %bb.0: 1911; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1912; X64-AVX512-NEXT: retq # encoding: [0xc3] 1913 %arg0 = bitcast double* %a0 to <2 x double>* 1914 %res = load <2 x double>, <2 x double>* %arg0, align 16 1915 ret <2 x double> %res 1916} 1917 1918define <2 x double> @test_mm_load_sd(double* %a0) nounwind { 1919; X86-SSE-LABEL: test_mm_load_sd: 1920; X86-SSE: # %bb.0: 1921; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1922; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 1923; X86-SSE-NEXT: # xmm0 = mem[0],zero 1924; X86-SSE-NEXT: retl # encoding: [0xc3] 1925; 1926; X86-AVX1-LABEL: test_mm_load_sd: 1927; X86-AVX1: # %bb.0: 1928; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1929; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 1930; X86-AVX1-NEXT: # xmm0 = mem[0],zero 1931; X86-AVX1-NEXT: retl # encoding: [0xc3] 1932; 1933; X86-AVX512-LABEL: test_mm_load_sd: 1934; X86-AVX512: # %bb.0: 1935; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1936; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 1937; X86-AVX512-NEXT: # xmm0 = mem[0],zero 1938; X86-AVX512-NEXT: retl # encoding: [0xc3] 1939; 1940; X64-SSE-LABEL: test_mm_load_sd: 1941; X64-SSE: # %bb.0: 1942; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 1943; X64-SSE-NEXT: # xmm0 = mem[0],zero 1944; X64-SSE-NEXT: retq # encoding: [0xc3] 1945; 1946; X64-AVX1-LABEL: test_mm_load_sd: 1947; X64-AVX1: # %bb.0: 1948; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 1949; X64-AVX1-NEXT: # xmm0 = mem[0],zero 1950; X64-AVX1-NEXT: retq # encoding: [0xc3] 1951; 1952; X64-AVX512-LABEL: test_mm_load_sd: 1953; X64-AVX512: # %bb.0: 1954; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 1955; X64-AVX512-NEXT: # xmm0 = mem[0],zero 1956; X64-AVX512-NEXT: retq # encoding: [0xc3] 1957 %ld = load double, double* %a0, align 1 1958 %res0 = insertelement <2 x double> undef, double %ld, i32 0 1959 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 1960 ret <2 x double> %res1 1961} 1962 1963define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { 1964; X86-SSE-LABEL: test_mm_load_si128: 1965; X86-SSE: # %bb.0: 1966; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1967; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1968; X86-SSE-NEXT: retl # encoding: [0xc3] 1969; 1970; X86-AVX1-LABEL: test_mm_load_si128: 1971; X86-AVX1: # %bb.0: 1972; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1973; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1974; X86-AVX1-NEXT: retl # encoding: [0xc3] 1975; 1976; X86-AVX512-LABEL: test_mm_load_si128: 1977; X86-AVX512: # %bb.0: 1978; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1979; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1980; X86-AVX512-NEXT: retl # encoding: [0xc3] 1981; 1982; X64-SSE-LABEL: test_mm_load_si128: 1983; X64-SSE: # %bb.0: 1984; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1985; X64-SSE-NEXT: retq # encoding: [0xc3] 1986; 1987; X64-AVX1-LABEL: test_mm_load_si128: 1988; X64-AVX1: # %bb.0: 1989; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1990; X64-AVX1-NEXT: retq # encoding: [0xc3] 1991; 1992; X64-AVX512-LABEL: test_mm_load_si128: 1993; X64-AVX512: # %bb.0: 1994; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1995; X64-AVX512-NEXT: retq # encoding: [0xc3] 1996 %res = load <2 x i64>, <2 x i64>* %a0, align 16 1997 ret <2 x i64> %res 1998} 1999 2000define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { 2001; X86-SSE-LABEL: test_mm_load1_pd: 2002; X86-SSE: # %bb.0: 2003; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2004; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2005; X86-SSE-NEXT: # xmm0 = mem[0],zero 2006; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2007; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 2008; X86-SSE-NEXT: retl # encoding: [0xc3] 2009; 2010; X86-AVX1-LABEL: test_mm_load1_pd: 2011; X86-AVX1: # %bb.0: 2012; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2013; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00] 2014; X86-AVX1-NEXT: # xmm0 = mem[0,0] 2015; X86-AVX1-NEXT: retl # encoding: [0xc3] 2016; 2017; X86-AVX512-LABEL: test_mm_load1_pd: 2018; X86-AVX512: # %bb.0: 2019; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2020; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00] 2021; X86-AVX512-NEXT: # xmm0 = mem[0,0] 2022; X86-AVX512-NEXT: retl # encoding: [0xc3] 2023; 2024; X64-SSE-LABEL: test_mm_load1_pd: 2025; X64-SSE: # %bb.0: 2026; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2027; X64-SSE-NEXT: # xmm0 = mem[0],zero 2028; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 2029; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 2030; X64-SSE-NEXT: retq # encoding: [0xc3] 2031; 2032; X64-AVX1-LABEL: test_mm_load1_pd: 2033; X64-AVX1: # %bb.0: 2034; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07] 2035; X64-AVX1-NEXT: # xmm0 = mem[0,0] 2036; X64-AVX1-NEXT: retq # encoding: [0xc3] 2037; 2038; X64-AVX512-LABEL: test_mm_load1_pd: 2039; X64-AVX512: # %bb.0: 2040; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] 2041; X64-AVX512-NEXT: # xmm0 = mem[0,0] 2042; X64-AVX512-NEXT: retq # encoding: [0xc3] 2043 %ld = load double, double* %a0, align 8 2044 %res0 = insertelement <2 x double> undef, double %ld, i32 0 2045 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 2046 ret <2 x double> %res1 2047} 2048 2049define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { 2050; X86-SSE-LABEL: test_mm_loadh_pd: 2051; X86-SSE: # %bb.0: 2052; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2053; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00] 2054; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0] 2055; X86-SSE-NEXT: retl # encoding: [0xc3] 2056; 2057; X86-AVX1-LABEL: test_mm_loadh_pd: 2058; X86-AVX1: # %bb.0: 2059; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2060; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] 2061; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 2062; X86-AVX1-NEXT: retl # encoding: [0xc3] 2063; 2064; X86-AVX512-LABEL: test_mm_loadh_pd: 2065; X86-AVX512: # %bb.0: 2066; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2067; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] 2068; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 2069; X86-AVX512-NEXT: retl # encoding: [0xc3] 2070; 2071; X64-SSE-LABEL: test_mm_loadh_pd: 2072; X64-SSE: # %bb.0: 2073; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07] 2074; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0] 2075; X64-SSE-NEXT: retq # encoding: [0xc3] 2076; 2077; X64-AVX1-LABEL: test_mm_loadh_pd: 2078; X64-AVX1: # %bb.0: 2079; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] 2080; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 2081; X64-AVX1-NEXT: retq # encoding: [0xc3] 2082; 2083; X64-AVX512-LABEL: test_mm_loadh_pd: 2084; X64-AVX512: # %bb.0: 2085; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] 2086; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 2087; X64-AVX512-NEXT: retq # encoding: [0xc3] 2088 %ld = load double, double* %a1, align 8 2089 %res = insertelement <2 x double> %a0, double %ld, i32 1 2090 ret <2 x double> %res 2091} 2092 2093define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { 2094; X86-SSE-LABEL: test_mm_loadl_epi64: 2095; X86-SSE: # %bb.0: 2096; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2097; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] 2098; X86-SSE-NEXT: # xmm0 = mem[0],zero 2099; X86-SSE-NEXT: retl # encoding: [0xc3] 2100; 2101; X86-AVX1-LABEL: test_mm_loadl_epi64: 2102; X86-AVX1: # %bb.0: 2103; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2104; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] 2105; X86-AVX1-NEXT: # xmm0 = mem[0],zero 2106; X86-AVX1-NEXT: retl # encoding: [0xc3] 2107; 2108; X86-AVX512-LABEL: test_mm_loadl_epi64: 2109; X86-AVX512: # %bb.0: 2110; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2111; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] 2112; X86-AVX512-NEXT: # xmm0 = mem[0],zero 2113; X86-AVX512-NEXT: retl # encoding: [0xc3] 2114; 2115; X64-SSE-LABEL: test_mm_loadl_epi64: 2116; X64-SSE: # %bb.0: 2117; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] 2118; X64-SSE-NEXT: # xmm0 = mem[0],zero 2119; X64-SSE-NEXT: retq # encoding: [0xc3] 2120; 2121; X64-AVX1-LABEL: test_mm_loadl_epi64: 2122; X64-AVX1: # %bb.0: 2123; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 2124; X64-AVX1-NEXT: # xmm0 = mem[0],zero 2125; X64-AVX1-NEXT: retq # encoding: [0xc3] 2126; 2127; X64-AVX512-LABEL: test_mm_loadl_epi64: 2128; X64-AVX512: # %bb.0: 2129; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 2130; X64-AVX512-NEXT: # xmm0 = mem[0],zero 2131; X64-AVX512-NEXT: retq # encoding: [0xc3] 2132 %bc = bitcast <2 x i64>* %a1 to i64* 2133 %ld = load i64, i64* %bc, align 1 2134 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 2135 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1 2136 ret <2 x i64> %res1 2137} 2138 2139define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { 2140; X86-SSE-LABEL: test_mm_loadl_pd: 2141; X86-SSE: # %bb.0: 2142; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2143; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00] 2144; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1] 2145; X86-SSE-NEXT: retl # encoding: [0xc3] 2146; 2147; X86-AVX1-LABEL: test_mm_loadl_pd: 2148; X86-AVX1: # %bb.0: 2149; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2150; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] 2151; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 2152; X86-AVX1-NEXT: retl # encoding: [0xc3] 2153; 2154; X86-AVX512-LABEL: test_mm_loadl_pd: 2155; X86-AVX512: # %bb.0: 2156; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2157; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] 2158; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 2159; X86-AVX512-NEXT: retl # encoding: [0xc3] 2160; 2161; X64-SSE-LABEL: test_mm_loadl_pd: 2162; X64-SSE: # %bb.0: 2163; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07] 2164; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1] 2165; X64-SSE-NEXT: retq # encoding: [0xc3] 2166; 2167; X64-AVX1-LABEL: test_mm_loadl_pd: 2168; X64-AVX1: # %bb.0: 2169; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] 2170; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 2171; X64-AVX1-NEXT: retq # encoding: [0xc3] 2172; 2173; X64-AVX512-LABEL: test_mm_loadl_pd: 2174; X64-AVX512: # %bb.0: 2175; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] 2176; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 2177; X64-AVX512-NEXT: retq # encoding: [0xc3] 2178 %ld = load double, double* %a1, align 8 2179 %res = insertelement <2 x double> %a0, double %ld, i32 0 2180 ret <2 x double> %res 2181} 2182 2183define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { 2184; X86-SSE-LABEL: test_mm_loadr_pd: 2185; X86-SSE: # %bb.0: 2186; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2187; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00] 2188; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 2189; X86-SSE-NEXT: # xmm0 = xmm0[1,0] 2190; X86-SSE-NEXT: retl # encoding: [0xc3] 2191; 2192; X86-AVX1-LABEL: test_mm_loadr_pd: 2193; X86-AVX1: # %bb.0: 2194; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2195; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2196; X86-AVX1-NEXT: # xmm0 = mem[1,0] 2197; X86-AVX1-NEXT: retl # encoding: [0xc3] 2198; 2199; X86-AVX512-LABEL: test_mm_loadr_pd: 2200; X86-AVX512: # %bb.0: 2201; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2202; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] 2203; X86-AVX512-NEXT: # xmm0 = mem[1,0] 2204; X86-AVX512-NEXT: retl # encoding: [0xc3] 2205; 2206; X64-SSE-LABEL: test_mm_loadr_pd: 2207; X64-SSE: # %bb.0: 2208; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07] 2209; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 2210; X64-SSE-NEXT: # xmm0 = xmm0[1,0] 2211; X64-SSE-NEXT: retq # encoding: [0xc3] 2212; 2213; X64-AVX1-LABEL: test_mm_loadr_pd: 2214; X64-AVX1: # %bb.0: 2215; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2216; X64-AVX1-NEXT: # xmm0 = mem[1,0] 2217; X64-AVX1-NEXT: retq # encoding: [0xc3] 2218; 2219; X64-AVX512-LABEL: test_mm_loadr_pd: 2220; X64-AVX512: # %bb.0: 2221; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] 2222; X64-AVX512-NEXT: # xmm0 = mem[1,0] 2223; X64-AVX512-NEXT: retq # encoding: [0xc3] 2224 %arg0 = bitcast double* %a0 to <2 x double>* 2225 %ld = load <2 x double>, <2 x double>* %arg0, align 16 2226 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> 2227 ret <2 x double> %res 2228} 2229 2230define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { 2231; X86-SSE-LABEL: test_mm_loadu_pd: 2232; X86-SSE: # %bb.0: 2233; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2234; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2235; X86-SSE-NEXT: retl # encoding: [0xc3] 2236; 2237; X86-AVX1-LABEL: test_mm_loadu_pd: 2238; X86-AVX1: # %bb.0: 2239; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2240; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2241; X86-AVX1-NEXT: retl # encoding: [0xc3] 2242; 2243; X86-AVX512-LABEL: test_mm_loadu_pd: 2244; X86-AVX512: # %bb.0: 2245; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2246; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2247; X86-AVX512-NEXT: retl # encoding: [0xc3] 2248; 2249; X64-SSE-LABEL: test_mm_loadu_pd: 2250; X64-SSE: # %bb.0: 2251; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2252; X64-SSE-NEXT: retq # encoding: [0xc3] 2253; 2254; X64-AVX1-LABEL: test_mm_loadu_pd: 2255; X64-AVX1: # %bb.0: 2256; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2257; X64-AVX1-NEXT: retq # encoding: [0xc3] 2258; 2259; X64-AVX512-LABEL: test_mm_loadu_pd: 2260; X64-AVX512: # %bb.0: 2261; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2262; X64-AVX512-NEXT: retq # encoding: [0xc3] 2263 %arg0 = bitcast double* %a0 to <2 x double>* 2264 %res = load <2 x double>, <2 x double>* %arg0, align 1 2265 ret <2 x double> %res 2266} 2267 2268define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { 2269; X86-SSE-LABEL: test_mm_loadu_si128: 2270; X86-SSE: # %bb.0: 2271; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2272; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 2273; X86-SSE-NEXT: retl # encoding: [0xc3] 2274; 2275; X86-AVX1-LABEL: test_mm_loadu_si128: 2276; X86-AVX1: # %bb.0: 2277; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2278; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 2279; X86-AVX1-NEXT: retl # encoding: [0xc3] 2280; 2281; X86-AVX512-LABEL: test_mm_loadu_si128: 2282; X86-AVX512: # %bb.0: 2283; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2284; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 2285; X86-AVX512-NEXT: retl # encoding: [0xc3] 2286; 2287; X64-SSE-LABEL: test_mm_loadu_si128: 2288; X64-SSE: # %bb.0: 2289; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 2290; X64-SSE-NEXT: retq # encoding: [0xc3] 2291; 2292; X64-AVX1-LABEL: test_mm_loadu_si128: 2293; X64-AVX1: # %bb.0: 2294; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 2295; X64-AVX1-NEXT: retq # encoding: [0xc3] 2296; 2297; X64-AVX512-LABEL: test_mm_loadu_si128: 2298; X64-AVX512: # %bb.0: 2299; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 2300; X64-AVX512-NEXT: retq # encoding: [0xc3] 2301 %res = load <2 x i64>, <2 x i64>* %a0, align 1 2302 ret <2 x i64> %res 2303} 2304 2305define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2306; SSE-LABEL: test_mm_madd_epi16: 2307; SSE: # %bb.0: 2308; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1] 2309; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2310; 2311; AVX1-LABEL: test_mm_madd_epi16: 2312; AVX1: # %bb.0: 2313; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1] 2314; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2315; 2316; AVX512-LABEL: test_mm_madd_epi16: 2317; AVX512: # %bb.0: 2318; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] 2319; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2320 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2321 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2322 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) 2323 %bc = bitcast <4 x i32> %res to <2 x i64> 2324 ret <2 x i64> %bc 2325} 2326declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 2327 2328define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { 2329; X86-SSE-LABEL: test_mm_maskmoveu_si128: 2330; X86-SSE: # %bb.0: 2331; X86-SSE-NEXT: pushl %edi # encoding: [0x57] 2332; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2333; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2334; X86-SSE-NEXT: popl %edi # encoding: [0x5f] 2335; X86-SSE-NEXT: retl # encoding: [0xc3] 2336; 2337; X86-AVX-LABEL: test_mm_maskmoveu_si128: 2338; X86-AVX: # %bb.0: 2339; X86-AVX-NEXT: pushl %edi # encoding: [0x57] 2340; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] 2341; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2342; X86-AVX-NEXT: popl %edi # encoding: [0x5f] 2343; X86-AVX-NEXT: retl # encoding: [0xc3] 2344; 2345; X64-SSE-LABEL: test_mm_maskmoveu_si128: 2346; X64-SSE: # %bb.0: 2347; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] 2348; X64-SSE-NEXT: retq # encoding: [0xc3] 2349; 2350; X64-AVX-LABEL: test_mm_maskmoveu_si128: 2351; X64-AVX: # %bb.0: 2352; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] 2353; X64-AVX-NEXT: retq # encoding: [0xc3] 2354 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2355 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2356 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) 2357 ret void 2358} 2359declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 2360 2361define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2362; SSE-LABEL: test_mm_max_epi16: 2363; SSE: # %bb.0: 2364; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1] 2365; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2366; 2367; AVX1-LABEL: test_mm_max_epi16: 2368; AVX1: # %bb.0: 2369; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1] 2370; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2371; 2372; AVX512-LABEL: test_mm_max_epi16: 2373; AVX512: # %bb.0: 2374; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] 2375; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2376 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2377 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2378 %cmp = icmp sgt <8 x i16> %arg0, %arg1 2379 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 2380 %bc = bitcast <8 x i16> %sel to <2 x i64> 2381 ret <2 x i64> %bc 2382} 2383 2384define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2385; SSE-LABEL: test_mm_max_epu8: 2386; SSE: # %bb.0: 2387; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1] 2388; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2389; 2390; AVX1-LABEL: test_mm_max_epu8: 2391; AVX1: # %bb.0: 2392; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1] 2393; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2394; 2395; AVX512-LABEL: test_mm_max_epu8: 2396; AVX512: # %bb.0: 2397; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] 2398; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2399 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2400 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2401 %cmp = icmp ugt <16 x i8> %arg0, %arg1 2402 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 2403 %bc = bitcast <16 x i8> %sel to <2 x i64> 2404 ret <2 x i64> %bc 2405} 2406 2407define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2408; SSE-LABEL: test_mm_max_pd: 2409; SSE: # %bb.0: 2410; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1] 2411; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2412; 2413; AVX1-LABEL: test_mm_max_pd: 2414; AVX1: # %bb.0: 2415; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1] 2416; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2417; 2418; AVX512-LABEL: test_mm_max_pd: 2419; AVX512: # %bb.0: 2420; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] 2421; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2422 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 2423 ret <2 x double> %res 2424} 2425declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 2426 2427define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2428; SSE-LABEL: test_mm_max_sd: 2429; SSE: # %bb.0: 2430; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1] 2431; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2432; 2433; AVX1-LABEL: test_mm_max_sd: 2434; AVX1: # %bb.0: 2435; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1] 2436; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2437; 2438; AVX512-LABEL: test_mm_max_sd: 2439; AVX512: # %bb.0: 2440; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1] 2441; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2442 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 2443 ret <2 x double> %res 2444} 2445declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 2446 2447define void @test_mm_mfence() nounwind { 2448; CHECK-LABEL: test_mm_mfence: 2449; CHECK: # %bb.0: 2450; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0] 2451; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2452 call void @llvm.x86.sse2.mfence() 2453 ret void 2454} 2455declare void @llvm.x86.sse2.mfence() nounwind readnone 2456 2457define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2458; SSE-LABEL: test_mm_min_epi16: 2459; SSE: # %bb.0: 2460; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1] 2461; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2462; 2463; AVX1-LABEL: test_mm_min_epi16: 2464; AVX1: # %bb.0: 2465; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1] 2466; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2467; 2468; AVX512-LABEL: test_mm_min_epi16: 2469; AVX512: # %bb.0: 2470; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] 2471; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2472 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2473 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2474 %cmp = icmp slt <8 x i16> %arg0, %arg1 2475 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 2476 %bc = bitcast <8 x i16> %sel to <2 x i64> 2477 ret <2 x i64> %bc 2478} 2479 2480define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2481; SSE-LABEL: test_mm_min_epu8: 2482; SSE: # %bb.0: 2483; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1] 2484; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2485; 2486; AVX1-LABEL: test_mm_min_epu8: 2487; AVX1: # %bb.0: 2488; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1] 2489; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2490; 2491; AVX512-LABEL: test_mm_min_epu8: 2492; AVX512: # %bb.0: 2493; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] 2494; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2495 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2496 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2497 %cmp = icmp ult <16 x i8> %arg0, %arg1 2498 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 2499 %bc = bitcast <16 x i8> %sel to <2 x i64> 2500 ret <2 x i64> %bc 2501} 2502 2503define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2504; SSE-LABEL: test_mm_min_pd: 2505; SSE: # %bb.0: 2506; SSE-NEXT: minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1] 2507; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2508; 2509; AVX1-LABEL: test_mm_min_pd: 2510; AVX1: # %bb.0: 2511; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1] 2512; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2513; 2514; AVX512-LABEL: test_mm_min_pd: 2515; AVX512: # %bb.0: 2516; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] 2517; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2518 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 2519 ret <2 x double> %res 2520} 2521declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 2522 2523define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2524; SSE-LABEL: test_mm_min_sd: 2525; SSE: # %bb.0: 2526; SSE-NEXT: minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1] 2527; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2528; 2529; AVX1-LABEL: test_mm_min_sd: 2530; AVX1: # %bb.0: 2531; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1] 2532; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2533; 2534; AVX512-LABEL: test_mm_min_sd: 2535; AVX512: # %bb.0: 2536; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1] 2537; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2538 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 2539 ret <2 x double> %res 2540} 2541declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 2542 2543define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { 2544; SSE-LABEL: test_mm_move_epi64: 2545; SSE: # %bb.0: 2546; SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 2547; SSE-NEXT: # xmm0 = xmm0[0],zero 2548; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2549; 2550; AVX1-LABEL: test_mm_move_epi64: 2551; AVX1: # %bb.0: 2552; AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 2553; AVX1-NEXT: # xmm0 = xmm0[0],zero 2554; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2555; 2556; AVX512-LABEL: test_mm_move_epi64: 2557; AVX512: # %bb.0: 2558; AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 2559; AVX512-NEXT: # xmm0 = xmm0[0],zero 2560; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2561 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 2562 ret <2 x i64> %res 2563} 2564 2565define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2566; SSE-LABEL: test_mm_move_sd: 2567; SSE: # %bb.0: 2568; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] 2569; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] 2570; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2571; 2572; AVX-LABEL: test_mm_move_sd: 2573; AVX: # %bb.0: 2574; AVX-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] 2575; AVX-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] 2576; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2577 %ext0 = extractelement <2 x double> %a1, i32 0 2578 %res0 = insertelement <2 x double> undef, double %ext0, i32 0 2579 %ext1 = extractelement <2 x double> %a0, i32 1 2580 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1 2581 ret <2 x double> %res1 2582} 2583 2584define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { 2585; SSE-LABEL: test_mm_movemask_epi8: 2586; SSE: # %bb.0: 2587; SSE-NEXT: pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0] 2588; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2589; 2590; AVX-LABEL: test_mm_movemask_epi8: 2591; AVX: # %bb.0: 2592; AVX-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] 2593; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2594 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2595 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) 2596 ret i32 %res 2597} 2598declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 2599 2600define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { 2601; SSE-LABEL: test_mm_movemask_pd: 2602; SSE: # %bb.0: 2603; SSE-NEXT: movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0] 2604; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2605; 2606; AVX-LABEL: test_mm_movemask_pd: 2607; AVX: # %bb.0: 2608; AVX-NEXT: vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0] 2609; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2610 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 2611 ret i32 %res 2612} 2613declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 2614 2615define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2616; X86-SSE-LABEL: test_mm_mul_epu32: 2617; X86-SSE: # %bb.0: 2618; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 2619; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] 2620; X86-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 2621; X86-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] 2622; X86-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] 2623; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] 2624; X86-SSE-NEXT: retl # encoding: [0xc3] 2625; 2626; AVX1-LABEL: test_mm_mul_epu32: 2627; AVX1: # %bb.0: 2628; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] 2629; AVX1-NEXT: vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc] 2630; AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 2631; AVX1-NEXT: vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc] 2632; AVX1-NEXT: # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2633; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1] 2634; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2635; 2636; AVX512-LABEL: test_mm_mul_epu32: 2637; AVX512: # %bb.0: 2638; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] 2639; AVX512-NEXT: vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a] 2640; AVX512-NEXT: # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 2641; AVX512-NEXT: vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a] 2642; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2643; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] 2644; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2645; 2646; X64-SSE-LABEL: test_mm_mul_epu32: 2647; X64-SSE: # %bb.0: 2648; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 2649; X64-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] 2650; X64-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 2651; X64-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] 2652; X64-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] 2653; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] 2654; X64-SSE-NEXT: retq # encoding: [0xc3] 2655 %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295> 2656 %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295> 2657 %res = mul nuw <2 x i64> %A, %B 2658 ret <2 x i64> %res 2659} 2660 2661define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2662; SSE-LABEL: test_mm_mul_pd: 2663; SSE: # %bb.0: 2664; SSE-NEXT: mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1] 2665; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2666; 2667; AVX1-LABEL: test_mm_mul_pd: 2668; AVX1: # %bb.0: 2669; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1] 2670; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2671; 2672; AVX512-LABEL: test_mm_mul_pd: 2673; AVX512: # %bb.0: 2674; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1] 2675; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2676 %res = fmul <2 x double> %a0, %a1 2677 ret <2 x double> %res 2678} 2679 2680define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2681; SSE-LABEL: test_mm_mul_sd: 2682; SSE: # %bb.0: 2683; SSE-NEXT: mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1] 2684; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2685; 2686; AVX1-LABEL: test_mm_mul_sd: 2687; AVX1: # %bb.0: 2688; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1] 2689; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2690; 2691; AVX512-LABEL: test_mm_mul_sd: 2692; AVX512: # %bb.0: 2693; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] 2694; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2695 %ext0 = extractelement <2 x double> %a0, i32 0 2696 %ext1 = extractelement <2 x double> %a1, i32 0 2697 %fmul = fmul double %ext0, %ext1 2698 %res = insertelement <2 x double> %a0, double %fmul, i32 0 2699 ret <2 x double> %res 2700} 2701 2702define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2703; SSE-LABEL: test_mm_mulhi_epi16: 2704; SSE: # %bb.0: 2705; SSE-NEXT: pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1] 2706; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2707; 2708; AVX1-LABEL: test_mm_mulhi_epi16: 2709; AVX1: # %bb.0: 2710; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1] 2711; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2712; 2713; AVX512-LABEL: test_mm_mulhi_epi16: 2714; AVX512: # %bb.0: 2715; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] 2716; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2717 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2718 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2719 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) 2720 %bc = bitcast <8 x i16> %res to <2 x i64> 2721 ret <2 x i64> %bc 2722} 2723declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 2724 2725define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { 2726; SSE-LABEL: test_mm_mulhi_epu16: 2727; SSE: # %bb.0: 2728; SSE-NEXT: pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1] 2729; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2730; 2731; AVX1-LABEL: test_mm_mulhi_epu16: 2732; AVX1: # %bb.0: 2733; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1] 2734; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2735; 2736; AVX512-LABEL: test_mm_mulhi_epu16: 2737; AVX512: # %bb.0: 2738; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] 2739; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2740 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2741 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2742 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) 2743 %bc = bitcast <8 x i16> %res to <2 x i64> 2744 ret <2 x i64> %bc 2745} 2746declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 2747 2748define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2749; SSE-LABEL: test_mm_mullo_epi16: 2750; SSE: # %bb.0: 2751; SSE-NEXT: pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1] 2752; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2753; 2754; AVX1-LABEL: test_mm_mullo_epi16: 2755; AVX1: # %bb.0: 2756; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1] 2757; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2758; 2759; AVX512-LABEL: test_mm_mullo_epi16: 2760; AVX512: # %bb.0: 2761; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] 2762; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2763 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2764 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2765 %res = mul <8 x i16> %arg0, %arg1 2766 %bc = bitcast <8 x i16> %res to <2 x i64> 2767 ret <2 x i64> %bc 2768} 2769 2770define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 2771; SSE-LABEL: test_mm_or_pd: 2772; SSE: # %bb.0: 2773; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2774; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2775; 2776; AVX1-LABEL: test_mm_or_pd: 2777; AVX1: # %bb.0: 2778; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2779; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2780; 2781; AVX512-LABEL: test_mm_or_pd: 2782; AVX512: # %bb.0: 2783; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2784; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2785 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 2786 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 2787 %res = or <4 x i32> %arg0, %arg1 2788 %bc = bitcast <4 x i32> %res to <2 x double> 2789 ret <2 x double> %bc 2790} 2791 2792define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2793; SSE-LABEL: test_mm_or_si128: 2794; SSE: # %bb.0: 2795; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 2796; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2797; 2798; AVX1-LABEL: test_mm_or_si128: 2799; AVX1: # %bb.0: 2800; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2801; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2802; 2803; AVX512-LABEL: test_mm_or_si128: 2804; AVX512: # %bb.0: 2805; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 2806; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2807 %res = or <2 x i64> %a0, %a1 2808 ret <2 x i64> %res 2809} 2810 2811define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2812; SSE-LABEL: test_mm_packs_epi16: 2813; SSE: # %bb.0: 2814; SSE-NEXT: packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1] 2815; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2816; 2817; AVX1-LABEL: test_mm_packs_epi16: 2818; AVX1: # %bb.0: 2819; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1] 2820; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2821; 2822; AVX512-LABEL: test_mm_packs_epi16: 2823; AVX512: # %bb.0: 2824; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 2825; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2826 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2827 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2828 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2829 %bc = bitcast <16 x i8> %res to <2 x i64> 2830 ret <2 x i64> %bc 2831} 2832declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2833 2834define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2835; SSE-LABEL: test_mm_packs_epi32: 2836; SSE: # %bb.0: 2837; SSE-NEXT: packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1] 2838; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2839; 2840; AVX1-LABEL: test_mm_packs_epi32: 2841; AVX1: # %bb.0: 2842; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1] 2843; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2844; 2845; AVX512-LABEL: test_mm_packs_epi32: 2846; AVX512: # %bb.0: 2847; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 2848; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2849 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2850 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2851 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) 2852 %bc = bitcast <8 x i16> %res to <2 x i64> 2853 ret <2 x i64> %bc 2854} 2855declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 2856 2857define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2858; SSE-LABEL: test_mm_packus_epi16: 2859; SSE: # %bb.0: 2860; SSE-NEXT: packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1] 2861; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2862; 2863; AVX1-LABEL: test_mm_packus_epi16: 2864; AVX1: # %bb.0: 2865; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1] 2866; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2867; 2868; AVX512-LABEL: test_mm_packus_epi16: 2869; AVX512: # %bb.0: 2870; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 2871; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2872 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2873 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2874 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2875 %bc = bitcast <16 x i8> %res to <2 x i64> 2876 ret <2 x i64> %bc 2877} 2878declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2879 2880define void @test_mm_pause() nounwind { 2881; CHECK-LABEL: test_mm_pause: 2882; CHECK: # %bb.0: 2883; CHECK-NEXT: pause # encoding: [0xf3,0x90] 2884; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2885 call void @llvm.x86.sse2.pause() 2886 ret void 2887} 2888declare void @llvm.x86.sse2.pause() nounwind readnone 2889 2890define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2891; SSE-LABEL: test_mm_sad_epu8: 2892; SSE: # %bb.0: 2893; SSE-NEXT: psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1] 2894; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2895; 2896; AVX1-LABEL: test_mm_sad_epu8: 2897; AVX1: # %bb.0: 2898; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1] 2899; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2900; 2901; AVX512-LABEL: test_mm_sad_epu8: 2902; AVX512: # %bb.0: 2903; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] 2904; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2905 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2906 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2907 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) 2908 ret <2 x i64> %res 2909} 2910declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 2911 2912define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 2913; X86-SSE-LABEL: test_mm_set_epi8: 2914; X86-SSE: # %bb.0: 2915; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2916; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2917; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 2918; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 2919; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 2920; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2921; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 2922; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2923; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 2924; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2925; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2926; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2927; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 2928; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2929; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 2930; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2931; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 2932; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 2933; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 2934; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2935; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 2936; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2937; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 2938; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 2939; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 2940; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2941; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 2942; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2943; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 2944; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2945; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 2946; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2947; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 2948; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2949; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2950; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2951; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 2952; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2953; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 2954; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 2955; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 2956; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2957; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 2958; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 2959; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 2960; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2961; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 2962; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 2963; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 2964; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2965; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 2966; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 2967; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 2968; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 2969; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 2970; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2971; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 2972; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2973; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 2974; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2975; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 2976; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2977; X86-SSE-NEXT: retl # encoding: [0xc3] 2978; 2979; X86-AVX1-LABEL: test_mm_set_epi8: 2980; X86-AVX1: # %bb.0: 2981; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 2982; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 2983; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 2984; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 2985; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 2986; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 2987; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 2988; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 2989; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 2990; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 2991; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 2992; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 2993; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 2994; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 2995; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 2996; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 2997; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 2998; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 2999; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3000; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3001; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3002; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3003; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3004; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3005; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3006; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3007; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3008; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3009; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3010; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3011; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3012; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3013; X86-AVX1-NEXT: retl # encoding: [0xc3] 3014; 3015; X86-AVX512-LABEL: test_mm_set_epi8: 3016; X86-AVX512: # %bb.0: 3017; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3018; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] 3019; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3020; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3021; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3022; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3023; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3024; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3025; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3026; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3027; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3028; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3029; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3030; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3031; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3032; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3033; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3034; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3035; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3036; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3037; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3038; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3039; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3040; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3041; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3042; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3043; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3044; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3045; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3046; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3047; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3048; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3049; X86-AVX512-NEXT: retl # encoding: [0xc3] 3050; 3051; X64-SSE-LABEL: test_mm_set_epi8: 3052; X64-SSE: # %bb.0: 3053; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3054; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3055; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3056; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3057; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3058; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3059; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3060; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3061; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3062; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3063; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3064; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3065; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3066; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3067; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3068; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3069; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3070; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3071; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3072; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3073; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3074; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3075; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3076; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3077; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3078; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3079; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3080; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3081; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3082; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3083; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3084; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3085; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3086; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3087; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3088; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3089; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3090; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3091; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3092; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3093; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3094; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3095; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3096; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3097; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3098; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3099; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3100; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3101; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3102; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3103; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 3104; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3105; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3106; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3107; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3108; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3109; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3110; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3111; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3112; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3113; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3114; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3115; X64-SSE-NEXT: retq # encoding: [0xc3] 3116; 3117; X64-AVX1-LABEL: test_mm_set_epi8: 3118; X64-AVX1: # %bb.0: 3119; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3120; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3121; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3122; X64-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3123; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3124; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3125; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3126; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3127; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3128; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3129; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3130; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3131; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3132; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3133; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3134; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3135; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3136; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3137; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3138; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3139; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3140; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3141; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3142; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3143; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3144; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3145; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3146; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3147; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3148; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3149; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3150; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3151; X64-AVX1-NEXT: retq # encoding: [0xc3] 3152; 3153; X64-AVX512-LABEL: test_mm_set_epi8: 3154; X64-AVX512: # %bb.0: 3155; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] 3156; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 3157; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3158; X64-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] 3159; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3160; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3161; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3162; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 3163; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3164; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 3165; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3166; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 3167; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3168; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 3169; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3170; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 3171; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3172; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 3173; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3174; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 3175; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 3176; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 3177; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 3178; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 3179; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3180; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 3181; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 3182; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 3183; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 3184; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 3185; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3186; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 3187; X64-AVX512-NEXT: retq # encoding: [0xc3] 3188 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 3189 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 3190 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 3191 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3 3192 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4 3193 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5 3194 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6 3195 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7 3196 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8 3197 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9 3198 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10 3199 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11 3200 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12 3201 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13 3202 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14 3203 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15 3204 %res = bitcast <16 x i8> %res15 to <2 x i64> 3205 ret <2 x i64> %res 3206} 3207 3208define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 3209; X86-SSE-LABEL: test_mm_set_epi16: 3210; X86-SSE: # %bb.0: 3211; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3212; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3213; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3214; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3215; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3216; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3217; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3218; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3219; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3220; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 3221; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3222; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 3223; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3224; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 3225; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3226; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3227; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3228; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3229; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 3230; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 3231; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 3232; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 3233; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 3234; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 3235; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 3236; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 3237; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 3238; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 3239; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 3240; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 3241; X86-SSE-NEXT: retl # encoding: [0xc3] 3242; 3243; X86-AVX1-LABEL: test_mm_set_epi16: 3244; X86-AVX1: # %bb.0: 3245; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3246; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3247; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3248; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3249; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3250; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3251; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3252; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3253; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3254; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3255; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3256; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3257; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3258; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3259; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3260; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3261; X86-AVX1-NEXT: retl # encoding: [0xc3] 3262; 3263; X86-AVX512-LABEL: test_mm_set_epi16: 3264; X86-AVX512: # %bb.0: 3265; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 3266; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3267; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 3268; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 3269; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 3270; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 3271; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 3272; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 3273; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3274; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 3275; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 3276; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 3277; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3278; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 3279; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3280; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 3281; X86-AVX512-NEXT: retl # encoding: [0xc3] 3282; 3283; X64-SSE-LABEL: test_mm_set_epi16: 3284; X64-SSE: # %bb.0: 3285; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 3286; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 3287; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3288; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3289; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3290; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3291; X64-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] 3292; X64-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] 3293; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 3294; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 3295; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 3296; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3297; X64-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] 3298; X64-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] 3299; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 3300; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3301; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3302; X64-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] 3303; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 3304; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 3305; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3306; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3307; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 3308; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 3309; X64-SSE-NEXT: retq # encoding: [0xc3] 3310; 3311; X64-AVX1-LABEL: test_mm_set_epi16: 3312; X64-AVX1: # %bb.0: 3313; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3314; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3315; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3316; X64-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3317; X64-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3318; X64-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3319; X64-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3320; X64-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3321; X64-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3322; X64-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3323; X64-AVX1-NEXT: retq # encoding: [0xc3] 3324; 3325; X64-AVX512-LABEL: test_mm_set_epi16: 3326; X64-AVX512: # %bb.0: 3327; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 3328; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 3329; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 3330; X64-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] 3331; X64-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] 3332; X64-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] 3333; X64-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 3334; X64-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] 3335; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] 3336; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 3337; X64-AVX512-NEXT: retq # encoding: [0xc3] 3338 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 3339 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 3340 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 3341 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3 3342 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4 3343 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5 3344 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6 3345 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3346 %res = bitcast <8 x i16> %res7 to <2 x i64> 3347 ret <2 x i64> %res 3348} 3349 3350define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 3351; X86-SSE-LABEL: test_mm_set_epi32: 3352; X86-SSE: # %bb.0: 3353; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 3354; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3355; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 3356; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3357; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3358; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3359; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c] 3360; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3361; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 3362; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3363; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3364; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3365; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3366; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3367; X86-SSE-NEXT: retl # encoding: [0xc3] 3368; 3369; X86-AVX1-LABEL: test_mm_set_epi32: 3370; X86-AVX1: # %bb.0: 3371; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3372; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3373; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3374; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3375; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3376; X86-AVX1-NEXT: retl # encoding: [0xc3] 3377; 3378; X86-AVX512-LABEL: test_mm_set_epi32: 3379; X86-AVX512: # %bb.0: 3380; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] 3381; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3382; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] 3383; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] 3384; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] 3385; X86-AVX512-NEXT: retl # encoding: [0xc3] 3386; 3387; X64-SSE-LABEL: test_mm_set_epi32: 3388; X64-SSE: # %bb.0: 3389; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3390; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] 3391; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 3392; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3393; X64-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] 3394; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 3395; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 3396; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3397; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3398; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3399; X64-SSE-NEXT: retq # encoding: [0xc3] 3400; 3401; X64-AVX1-LABEL: test_mm_set_epi32: 3402; X64-AVX1: # %bb.0: 3403; X64-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3404; X64-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3405; X64-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3406; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3407; X64-AVX1-NEXT: retq # encoding: [0xc3] 3408; 3409; X64-AVX512-LABEL: test_mm_set_epi32: 3410; X64-AVX512: # %bb.0: 3411; X64-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 3412; X64-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] 3413; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] 3414; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] 3415; X64-AVX512-NEXT: retq # encoding: [0xc3] 3416 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 3417 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 3418 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 3419 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3420 %res = bitcast <4 x i32> %res3 to <2 x i64> 3421 ret <2 x i64> %res 3422} 3423 3424; TODO test_mm_set_epi64 3425 3426define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { 3427; X86-SSE-LABEL: test_mm_set_epi64x: 3428; X86-SSE: # %bb.0: 3429; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 3430; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3431; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 3432; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3433; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 3434; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3435; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c] 3436; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3437; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10] 3438; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 3439; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 3440; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3441; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3442; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3443; X86-SSE-NEXT: retl # encoding: [0xc3] 3444; 3445; X86-AVX1-LABEL: test_mm_set_epi64x: 3446; X86-AVX1: # %bb.0: 3447; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3448; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3449; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3450; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3451; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3452; X86-AVX1-NEXT: retl # encoding: [0xc3] 3453; 3454; X86-AVX512-LABEL: test_mm_set_epi64x: 3455; X86-AVX512: # %bb.0: 3456; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] 3457; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3458; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] 3459; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] 3460; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] 3461; X86-AVX512-NEXT: retl # encoding: [0xc3] 3462; 3463; X64-SSE-LABEL: test_mm_set_epi64x: 3464; X64-SSE: # %bb.0: 3465; X64-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] 3466; X64-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] 3467; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3468; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3469; X64-SSE-NEXT: retq # encoding: [0xc3] 3470; 3471; X64-AVX1-LABEL: test_mm_set_epi64x: 3472; X64-AVX1: # %bb.0: 3473; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3474; X64-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3475; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 3476; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3477; X64-AVX1-NEXT: retq # encoding: [0xc3] 3478; 3479; X64-AVX512-LABEL: test_mm_set_epi64x: 3480; X64-AVX512: # %bb.0: 3481; X64-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3482; X64-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] 3483; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 3484; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3485; X64-AVX512-NEXT: retq # encoding: [0xc3] 3486 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 3487 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3488 ret <2 x i64> %res1 3489} 3490 3491define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { 3492; X86-SSE-LABEL: test_mm_set_pd: 3493; X86-SSE: # %bb.0: 3494; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c] 3495; X86-SSE-NEXT: # xmm0 = mem[0],zero 3496; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04] 3497; X86-SSE-NEXT: # xmm1 = mem[0],zero 3498; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 3499; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3500; X86-SSE-NEXT: retl # encoding: [0xc3] 3501; 3502; X86-AVX1-LABEL: test_mm_set_pd: 3503; X86-AVX1: # %bb.0: 3504; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3505; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3506; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3507; X86-AVX1-NEXT: # xmm1 = mem[0],zero 3508; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 3509; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 3510; X86-AVX1-NEXT: retl # encoding: [0xc3] 3511; 3512; X86-AVX512-LABEL: test_mm_set_pd: 3513; X86-AVX512: # %bb.0: 3514; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 3515; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3516; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 3517; X86-AVX512-NEXT: # xmm1 = mem[0],zero 3518; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 3519; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 3520; X86-AVX512-NEXT: retl # encoding: [0xc3] 3521; 3522; X64-SSE-LABEL: test_mm_set_pd: 3523; X64-SSE: # %bb.0: 3524; X64-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] 3525; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] 3526; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 3527; X64-SSE-NEXT: retq # encoding: [0xc3] 3528; 3529; X64-AVX1-LABEL: test_mm_set_pd: 3530; X64-AVX1: # %bb.0: 3531; X64-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 3532; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 3533; X64-AVX1-NEXT: retq # encoding: [0xc3] 3534; 3535; X64-AVX512-LABEL: test_mm_set_pd: 3536; X64-AVX512: # %bb.0: 3537; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 3538; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 3539; X64-AVX512-NEXT: retq # encoding: [0xc3] 3540 %res0 = insertelement <2 x double> undef, double %a1, i32 0 3541 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3542 ret <2 x double> %res1 3543} 3544 3545define <2 x double> @test_mm_set_pd1(double %a0) nounwind { 3546; X86-SSE-LABEL: test_mm_set_pd1: 3547; X86-SSE: # %bb.0: 3548; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3549; X86-SSE-NEXT: # xmm0 = mem[0],zero 3550; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3551; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3552; X86-SSE-NEXT: retl # encoding: [0xc3] 3553; 3554; X86-AVX1-LABEL: test_mm_set_pd1: 3555; X86-AVX1: # %bb.0: 3556; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3557; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3558; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3559; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 3560; X86-AVX1-NEXT: retl # encoding: [0xc3] 3561; 3562; X86-AVX512-LABEL: test_mm_set_pd1: 3563; X86-AVX512: # %bb.0: 3564; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3565; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3566; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3567; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 3568; X86-AVX512-NEXT: retl # encoding: [0xc3] 3569; 3570; X64-SSE-LABEL: test_mm_set_pd1: 3571; X64-SSE: # %bb.0: 3572; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3573; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 3574; X64-SSE-NEXT: retq # encoding: [0xc3] 3575; 3576; X64-AVX1-LABEL: test_mm_set_pd1: 3577; X64-AVX1: # %bb.0: 3578; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3579; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 3580; X64-AVX1-NEXT: retq # encoding: [0xc3] 3581; 3582; X64-AVX512-LABEL: test_mm_set_pd1: 3583; X64-AVX512: # %bb.0: 3584; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3585; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 3586; X64-AVX512-NEXT: retq # encoding: [0xc3] 3587 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3588 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3589 ret <2 x double> %res1 3590} 3591 3592define <2 x double> @test_mm_set_sd(double %a0) nounwind { 3593; X86-SSE-LABEL: test_mm_set_sd: 3594; X86-SSE: # %bb.0: 3595; X86-SSE-NEXT: movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04] 3596; X86-SSE-NEXT: # xmm0 = mem[0],zero 3597; X86-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3598; X86-SSE-NEXT: # xmm0 = xmm0[0],zero 3599; X86-SSE-NEXT: retl # encoding: [0xc3] 3600; 3601; X86-AVX1-LABEL: test_mm_set_sd: 3602; X86-AVX1: # %bb.0: 3603; X86-AVX1-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3604; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3605; X86-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3606; X86-AVX1-NEXT: # xmm0 = xmm0[0],zero 3607; X86-AVX1-NEXT: retl # encoding: [0xc3] 3608; 3609; X86-AVX512-LABEL: test_mm_set_sd: 3610; X86-AVX512: # %bb.0: 3611; X86-AVX512-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] 3612; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3613; X86-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3614; X86-AVX512-NEXT: # xmm0 = xmm0[0],zero 3615; X86-AVX512-NEXT: retl # encoding: [0xc3] 3616; 3617; X64-SSE-LABEL: test_mm_set_sd: 3618; X64-SSE: # %bb.0: 3619; X64-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] 3620; X64-SSE-NEXT: # xmm0 = xmm0[0],zero 3621; X64-SSE-NEXT: retq # encoding: [0xc3] 3622; 3623; X64-AVX1-LABEL: test_mm_set_sd: 3624; X64-AVX1: # %bb.0: 3625; X64-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] 3626; X64-AVX1-NEXT: # xmm0 = xmm0[0],zero 3627; X64-AVX1-NEXT: retq # encoding: [0xc3] 3628; 3629; X64-AVX512-LABEL: test_mm_set_sd: 3630; X64-AVX512: # %bb.0: 3631; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] 3632; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero 3633; X64-AVX512-NEXT: retq # encoding: [0xc3] 3634 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3635 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 3636 ret <2 x double> %res1 3637} 3638 3639define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { 3640; X86-SSE-LABEL: test_mm_set1_epi8: 3641; X86-SSE: # %bb.0: 3642; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3643; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3644; X86-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3645; X86-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3646; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3647; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3648; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3649; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3650; X86-SSE-NEXT: retl # encoding: [0xc3] 3651; 3652; X86-AVX1-LABEL: test_mm_set1_epi8: 3653; X86-AVX1: # %bb.0: 3654; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3655; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3656; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3657; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3658; X86-AVX1-NEXT: retl # encoding: [0xc3] 3659; 3660; X86-AVX512-LABEL: test_mm_set1_epi8: 3661; X86-AVX512: # %bb.0: 3662; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 3663; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0] 3664; X86-AVX512-NEXT: retl # encoding: [0xc3] 3665; 3666; X64-SSE-LABEL: test_mm_set1_epi8: 3667; X64-SSE: # %bb.0: 3668; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3669; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3670; X64-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] 3671; X64-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3672; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3673; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3674; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3675; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3676; X64-SSE-NEXT: retq # encoding: [0xc3] 3677; 3678; X64-AVX1-LABEL: test_mm_set1_epi8: 3679; X64-AVX1: # %bb.0: 3680; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 3681; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3682; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] 3683; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] 3684; X64-AVX1-NEXT: retq # encoding: [0xc3] 3685; 3686; X64-AVX512-LABEL: test_mm_set1_epi8: 3687; X64-AVX512: # %bb.0: 3688; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] 3689; X64-AVX512-NEXT: retq # encoding: [0xc3] 3690 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 3691 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 3692 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 3693 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3 3694 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4 3695 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5 3696 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6 3697 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7 3698 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8 3699 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9 3700 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10 3701 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11 3702 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12 3703 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13 3704 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14 3705 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15 3706 %res = bitcast <16 x i8> %res15 to <2 x i64> 3707 ret <2 x i64> %res 3708} 3709 3710define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { 3711; X86-SSE-LABEL: test_mm_set1_epi16: 3712; X86-SSE: # %bb.0: 3713; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3714; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3715; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3716; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3717; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3718; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3719; X86-SSE-NEXT: retl # encoding: [0xc3] 3720; 3721; X86-AVX1-LABEL: test_mm_set1_epi16: 3722; X86-AVX1: # %bb.0: 3723; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3724; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 3725; X86-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] 3726; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3727; X86-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3728; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3729; X86-AVX1-NEXT: retl # encoding: [0xc3] 3730; 3731; X86-AVX512-LABEL: test_mm_set1_epi16: 3732; X86-AVX512: # %bb.0: 3733; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 3734; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0] 3735; X86-AVX512-NEXT: retl # encoding: [0xc3] 3736; 3737; X64-SSE-LABEL: test_mm_set1_epi16: 3738; X64-SSE: # %bb.0: 3739; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3740; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] 3741; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3742; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3743; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3744; X64-SSE-NEXT: retq # encoding: [0xc3] 3745; 3746; X64-AVX1-LABEL: test_mm_set1_epi16: 3747; X64-AVX1: # %bb.0: 3748; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3749; X64-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] 3750; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] 3751; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3752; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3753; X64-AVX1-NEXT: retq # encoding: [0xc3] 3754; 3755; X64-AVX512-LABEL: test_mm_set1_epi16: 3756; X64-AVX512: # %bb.0: 3757; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] 3758; X64-AVX512-NEXT: retq # encoding: [0xc3] 3759 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 3760 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 3761 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 3762 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3 3763 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4 3764 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5 3765 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6 3766 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 3767 %res = bitcast <8 x i16> %res7 to <2 x i64> 3768 ret <2 x i64> %res 3769} 3770 3771define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { 3772; X86-SSE-LABEL: test_mm_set1_epi32: 3773; X86-SSE: # %bb.0: 3774; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3775; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3776; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3777; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3778; X86-SSE-NEXT: retl # encoding: [0xc3] 3779; 3780; X86-AVX1-LABEL: test_mm_set1_epi32: 3781; X86-AVX1: # %bb.0: 3782; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 3783; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 3784; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 3785; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3786; X86-AVX1-NEXT: retl # encoding: [0xc3] 3787; 3788; X86-AVX512-LABEL: test_mm_set1_epi32: 3789; X86-AVX512: # %bb.0: 3790; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3791; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0] 3792; X86-AVX512-NEXT: retl # encoding: [0xc3] 3793; 3794; X64-SSE-LABEL: test_mm_set1_epi32: 3795; X64-SSE: # %bb.0: 3796; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 3797; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 3798; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 3799; X64-SSE-NEXT: retq # encoding: [0xc3] 3800; 3801; X64-AVX1-LABEL: test_mm_set1_epi32: 3802; X64-AVX1: # %bb.0: 3803; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 3804; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] 3805; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 3806; X64-AVX1-NEXT: retq # encoding: [0xc3] 3807; 3808; X64-AVX512-LABEL: test_mm_set1_epi32: 3809; X64-AVX512: # %bb.0: 3810; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] 3811; X64-AVX512-NEXT: retq # encoding: [0xc3] 3812 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 3813 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 3814 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 3815 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 3816 %res = bitcast <4 x i32> %res3 to <2 x i64> 3817 ret <2 x i64> %res 3818} 3819 3820; TODO test_mm_set1_epi64 3821 3822define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { 3823; X86-SSE-LABEL: test_mm_set1_epi64x: 3824; X86-SSE: # %bb.0: 3825; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] 3826; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 3827; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08] 3828; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 3829; X86-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 3830; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3831; X86-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3832; X86-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3833; X86-SSE-NEXT: retl # encoding: [0xc3] 3834; 3835; X86-AVX1-LABEL: test_mm_set1_epi64x: 3836; X86-AVX1: # %bb.0: 3837; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 3838; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 3839; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3840; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 3841; X86-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 3842; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] 3843; X86-AVX1-NEXT: retl # encoding: [0xc3] 3844; 3845; X86-AVX512-LABEL: test_mm_set1_epi64x: 3846; X86-AVX512: # %bb.0: 3847; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 3848; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 3849; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 3850; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] 3851; X86-AVX512-NEXT: retl # encoding: [0xc3] 3852; 3853; X64-SSE-LABEL: test_mm_set1_epi64x: 3854; X64-SSE: # %bb.0: 3855; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 3856; X64-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] 3857; X64-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] 3858; X64-SSE-NEXT: retq # encoding: [0xc3] 3859; 3860; X64-AVX1-LABEL: test_mm_set1_epi64x: 3861; X64-AVX1: # %bb.0: 3862; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 3863; X64-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] 3864; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] 3865; X64-AVX1-NEXT: retq # encoding: [0xc3] 3866; 3867; X64-AVX512-LABEL: test_mm_set1_epi64x: 3868; X64-AVX512: # %bb.0: 3869; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] 3870; X64-AVX512-NEXT: retq # encoding: [0xc3] 3871 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 3872 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 3873 ret <2 x i64> %res1 3874} 3875 3876define <2 x double> @test_mm_set1_pd(double %a0) nounwind { 3877; X86-SSE-LABEL: test_mm_set1_pd: 3878; X86-SSE: # %bb.0: 3879; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 3880; X86-SSE-NEXT: # xmm0 = mem[0],zero 3881; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3882; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 3883; X86-SSE-NEXT: retl # encoding: [0xc3] 3884; 3885; X86-AVX1-LABEL: test_mm_set1_pd: 3886; X86-AVX1: # %bb.0: 3887; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3888; X86-AVX1-NEXT: # xmm0 = mem[0],zero 3889; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3890; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 3891; X86-AVX1-NEXT: retl # encoding: [0xc3] 3892; 3893; X86-AVX512-LABEL: test_mm_set1_pd: 3894; X86-AVX512: # %bb.0: 3895; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] 3896; X86-AVX512-NEXT: # xmm0 = mem[0],zero 3897; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3898; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 3899; X86-AVX512-NEXT: retl # encoding: [0xc3] 3900; 3901; X64-SSE-LABEL: test_mm_set1_pd: 3902; X64-SSE: # %bb.0: 3903; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 3904; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 3905; X64-SSE-NEXT: retq # encoding: [0xc3] 3906; 3907; X64-AVX1-LABEL: test_mm_set1_pd: 3908; X64-AVX1: # %bb.0: 3909; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 3910; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 3911; X64-AVX1-NEXT: retq # encoding: [0xc3] 3912; 3913; X64-AVX512-LABEL: test_mm_set1_pd: 3914; X64-AVX512: # %bb.0: 3915; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 3916; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 3917; X64-AVX512-NEXT: retq # encoding: [0xc3] 3918 %res0 = insertelement <2 x double> undef, double %a0, i32 0 3919 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 3920 ret <2 x double> %res1 3921} 3922 3923define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 3924; X86-SSE-LABEL: test_mm_setr_epi8: 3925; X86-SSE: # %bb.0: 3926; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 3927; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3928; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 3929; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3930; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3931; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3932; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 3933; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3934; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 3935; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3936; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3937; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3938; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 3939; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 3940; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 3941; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3942; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 3943; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3944; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3945; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3946; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 3947; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3948; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 3949; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 3950; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 3951; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3952; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 3953; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3954; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 3955; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3956; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 3957; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3958; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 3959; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3960; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3961; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3962; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 3963; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3964; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 3965; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 3966; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 3967; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3968; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 3969; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 3970; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3971; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3972; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3973; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 3974; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 3975; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3976; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3977; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 3978; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3979; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 3980; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 3981; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 3982; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 3983; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 3984; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 3985; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3986; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 3987; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 3988; X86-SSE-NEXT: retl # encoding: [0xc3] 3989; 3990; X86-AVX1-LABEL: test_mm_setr_epi8: 3991; X86-AVX1: # %bb.0: 3992; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3993; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 3994; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] 3995; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 3996; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 3997; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 3998; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 3999; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4000; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4001; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4002; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4003; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4004; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4005; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4006; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4007; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4008; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4009; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4010; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4011; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4012; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4013; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4014; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4015; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4016; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4017; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4018; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4019; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4020; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4021; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4022; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4023; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4024; X86-AVX1-NEXT: retl # encoding: [0xc3] 4025; 4026; X86-AVX512-LABEL: test_mm_setr_epi8: 4027; X86-AVX512: # %bb.0: 4028; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4029; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 4030; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] 4031; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4032; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 4033; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4034; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4035; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4036; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] 4037; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4038; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4039; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4040; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] 4041; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4042; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4043; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4044; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] 4045; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4046; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4047; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4048; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] 4049; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4050; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4051; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4052; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] 4053; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4054; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4055; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4056; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] 4057; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4058; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4059; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4060; X86-AVX512-NEXT: retl # encoding: [0xc3] 4061; 4062; X64-SSE-LABEL: test_mm_setr_epi8: 4063; X64-SSE: # %bb.0: 4064; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4065; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4066; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4067; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4068; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4069; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4070; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4071; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4072; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4073; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4074; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4075; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4076; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4077; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4078; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4079; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4080; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4081; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4082; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4083; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4084; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4085; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4086; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4087; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4088; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] 4089; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4090; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] 4091; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 4092; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] 4093; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4094; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4095; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4096; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4097; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4098; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4099; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4100; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4101; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4102; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4103; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4104; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] 4105; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 4106; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] 4107; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 4108; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4109; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4110; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4111; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4112; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] 4113; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 4114; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4115; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4116; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] 4117; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4118; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] 4119; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 4120; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] 4121; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4122; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] 4123; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 4124; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4125; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4126; X64-SSE-NEXT: retq # encoding: [0xc3] 4127; 4128; X64-AVX1-LABEL: test_mm_setr_epi8: 4129; X64-AVX1: # %bb.0: 4130; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4131; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4132; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] 4133; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4134; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4135; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4136; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4137; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4138; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4139; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4140; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4141; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4142; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4143; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4144; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4145; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4146; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4147; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4148; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4149; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4150; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4151; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4152; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4153; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4154; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4155; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4156; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4157; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4158; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4159; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4160; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4161; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4162; X64-AVX1-NEXT: retq # encoding: [0xc3] 4163; 4164; X64-AVX512-LABEL: test_mm_setr_epi8: 4165; X64-AVX512: # %bb.0: 4166; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] 4167; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] 4168; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] 4169; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] 4170; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] 4171; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 4172; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 4173; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] 4174; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] 4175; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 4176; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] 4177; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] 4178; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4179; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 4180; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] 4181; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] 4182; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] 4183; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 4184; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] 4185; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] 4186; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] 4187; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 4188; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] 4189; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] 4190; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] 4191; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 4192; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] 4193; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] 4194; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] 4195; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 4196; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] 4197; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] 4198; X64-AVX512-NEXT: retq # encoding: [0xc3] 4199 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 4200 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 4201 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 4202 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3 4203 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4 4204 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5 4205 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6 4206 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7 4207 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8 4208 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9 4209 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10 4210 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11 4211 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12 4212 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13 4213 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14 4214 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15 4215 %res = bitcast <16 x i8> %res15 to <2 x i64> 4216 ret <2 x i64> %res 4217} 4218 4219define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 4220; X86-SSE-LABEL: test_mm_setr_epi16: 4221; X86-SSE: # %bb.0: 4222; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4223; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] 4224; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4225; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] 4226; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4227; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] 4228; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4229; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] 4230; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4231; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] 4232; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4233; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] 4234; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4235; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] 4236; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4237; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4238; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] 4239; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 4240; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] 4241; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 4242; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] 4243; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 4244; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] 4245; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 4246; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] 4247; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 4248; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] 4249; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] 4250; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] 4251; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] 4252; X86-SSE-NEXT: retl # encoding: [0xc3] 4253; 4254; X86-AVX1-LABEL: test_mm_setr_epi16: 4255; X86-AVX1: # %bb.0: 4256; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4257; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] 4258; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4259; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4260; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4261; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4262; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4263; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4264; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4265; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4266; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4267; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4268; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4269; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4270; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4271; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4272; X86-AVX1-NEXT: retl # encoding: [0xc3] 4273; 4274; X86-AVX512-LABEL: test_mm_setr_epi16: 4275; X86-AVX512: # %bb.0: 4276; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 4277; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] 4278; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4279; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 4280; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] 4281; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 4282; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4283; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] 4284; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] 4285; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 4286; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] 4287; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 4288; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] 4289; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4290; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] 4291; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 4292; X86-AVX512-NEXT: retl # encoding: [0xc3] 4293; 4294; X64-SSE-LABEL: test_mm_setr_epi16: 4295; X64-SSE: # %bb.0: 4296; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] 4297; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] 4298; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] 4299; X64-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] 4300; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4301; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4302; X64-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] 4303; X64-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] 4304; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] 4305; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 4306; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] 4307; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 4308; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4309; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4310; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] 4311; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 4312; X64-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] 4313; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4314; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] 4315; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 4316; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] 4317; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4318; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] 4319; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 4320; X64-SSE-NEXT: retq # encoding: [0xc3] 4321; 4322; X64-AVX1-LABEL: test_mm_setr_epi16: 4323; X64-AVX1: # %bb.0: 4324; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4325; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4326; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4327; X64-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4328; X64-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4329; X64-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4330; X64-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4331; X64-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4332; X64-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4333; X64-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4334; X64-AVX1-NEXT: retq # encoding: [0xc3] 4335; 4336; X64-AVX512-LABEL: test_mm_setr_epi16: 4337; X64-AVX512: # %bb.0: 4338; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] 4339; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] 4340; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4341; X64-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] 4342; X64-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] 4343; X64-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] 4344; X64-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] 4345; X64-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] 4346; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 4347; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] 4348; X64-AVX512-NEXT: retq # encoding: [0xc3] 4349 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 4350 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 4351 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 4352 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3 4353 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4 4354 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5 4355 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6 4356 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7 4357 %res = bitcast <8 x i16> %res7 to <2 x i64> 4358 ret <2 x i64> %res 4359} 4360 4361define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 4362; X86-SSE-LABEL: test_mm_setr_epi32: 4363; X86-SSE: # %bb.0: 4364; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4365; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4366; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4367; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4368; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4369; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4370; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4371; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4372; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4373; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4374; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4375; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4376; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4377; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4378; X86-SSE-NEXT: retl # encoding: [0xc3] 4379; 4380; X86-AVX1-LABEL: test_mm_setr_epi32: 4381; X86-AVX1: # %bb.0: 4382; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4383; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4384; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4385; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4386; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4387; X86-AVX1-NEXT: retl # encoding: [0xc3] 4388; 4389; X86-AVX512-LABEL: test_mm_setr_epi32: 4390; X86-AVX512: # %bb.0: 4391; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4392; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4393; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4394; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4395; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4396; X86-AVX512-NEXT: retl # encoding: [0xc3] 4397; 4398; X64-SSE-LABEL: test_mm_setr_epi32: 4399; X64-SSE: # %bb.0: 4400; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] 4401; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] 4402; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] 4403; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4404; X64-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] 4405; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] 4406; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] 4407; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4408; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4409; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4410; X64-SSE-NEXT: retq # encoding: [0xc3] 4411; 4412; X64-AVX1-LABEL: test_mm_setr_epi32: 4413; X64-AVX1: # %bb.0: 4414; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] 4415; X64-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4416; X64-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4417; X64-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4418; X64-AVX1-NEXT: retq # encoding: [0xc3] 4419; 4420; X64-AVX512-LABEL: test_mm_setr_epi32: 4421; X64-AVX512: # %bb.0: 4422; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 4423; X64-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] 4424; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] 4425; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] 4426; X64-AVX512-NEXT: retq # encoding: [0xc3] 4427 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 4428 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 4429 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 4430 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3 4431 %res = bitcast <4 x i32> %res3 to <2 x i64> 4432 ret <2 x i64> %res 4433} 4434 4435; TODO test_mm_setr_epi64 4436 4437define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { 4438; X86-SSE-LABEL: test_mm_setr_epi64x: 4439; X86-SSE: # %bb.0: 4440; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 4441; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 4442; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 4443; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4444; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 4445; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4446; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 4447; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 4448; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 4449; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 4450; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 4451; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4452; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4453; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4454; X86-SSE-NEXT: retl # encoding: [0xc3] 4455; 4456; X86-AVX1-LABEL: test_mm_setr_epi64x: 4457; X86-AVX1: # %bb.0: 4458; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4459; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 4460; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4461; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4462; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4463; X86-AVX1-NEXT: retl # encoding: [0xc3] 4464; 4465; X86-AVX512-LABEL: test_mm_setr_epi64x: 4466; X86-AVX512: # %bb.0: 4467; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] 4468; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 4469; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] 4470; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] 4471; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] 4472; X86-AVX512-NEXT: retl # encoding: [0xc3] 4473; 4474; X64-SSE-LABEL: test_mm_setr_epi64x: 4475; X64-SSE: # %bb.0: 4476; X64-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] 4477; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] 4478; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] 4479; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4480; X64-SSE-NEXT: retq # encoding: [0xc3] 4481; 4482; X64-AVX1-LABEL: test_mm_setr_epi64x: 4483; X64-AVX1: # %bb.0: 4484; X64-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4485; X64-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4486; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] 4487; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4488; X64-AVX1-NEXT: retq # encoding: [0xc3] 4489; 4490; X64-AVX512-LABEL: test_mm_setr_epi64x: 4491; X64-AVX512: # %bb.0: 4492; X64-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] 4493; X64-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] 4494; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] 4495; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4496; X64-AVX512-NEXT: retq # encoding: [0xc3] 4497 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 4498 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 4499 ret <2 x i64> %res1 4500} 4501 4502define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { 4503; X86-SSE-LABEL: test_mm_setr_pd: 4504; X86-SSE: # %bb.0: 4505; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c] 4506; X86-SSE-NEXT: # xmm1 = mem[0],zero 4507; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] 4508; X86-SSE-NEXT: # xmm0 = mem[0],zero 4509; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4510; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4511; X86-SSE-NEXT: retl # encoding: [0xc3] 4512; 4513; X86-AVX1-LABEL: test_mm_setr_pd: 4514; X86-AVX1: # %bb.0: 4515; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4516; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4517; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4518; X86-AVX1-NEXT: # xmm1 = mem[0],zero 4519; X86-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] 4520; X86-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] 4521; X86-AVX1-NEXT: retl # encoding: [0xc3] 4522; 4523; X86-AVX512-LABEL: test_mm_setr_pd: 4524; X86-AVX512: # %bb.0: 4525; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] 4526; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4527; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] 4528; X86-AVX512-NEXT: # xmm1 = mem[0],zero 4529; X86-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] 4530; X86-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] 4531; X86-AVX512-NEXT: retl # encoding: [0xc3] 4532; 4533; X64-SSE-LABEL: test_mm_setr_pd: 4534; X64-SSE: # %bb.0: 4535; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 4536; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 4537; X64-SSE-NEXT: retq # encoding: [0xc3] 4538; 4539; X64-AVX1-LABEL: test_mm_setr_pd: 4540; X64-AVX1: # %bb.0: 4541; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 4542; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 4543; X64-AVX1-NEXT: retq # encoding: [0xc3] 4544; 4545; X64-AVX512-LABEL: test_mm_setr_pd: 4546; X64-AVX512: # %bb.0: 4547; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 4548; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 4549; X64-AVX512-NEXT: retq # encoding: [0xc3] 4550 %res0 = insertelement <2 x double> undef, double %a0, i32 0 4551 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 4552 ret <2 x double> %res1 4553} 4554 4555define <2 x double> @test_mm_setzero_pd() { 4556; SSE-LABEL: test_mm_setzero_pd: 4557; SSE: # %bb.0: 4558; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4559; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4560; 4561; AVX1-LABEL: test_mm_setzero_pd: 4562; AVX1: # %bb.0: 4563; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4564; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4565; 4566; AVX512-LABEL: test_mm_setzero_pd: 4567; AVX512: # %bb.0: 4568; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4569; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4570 ret <2 x double> zeroinitializer 4571} 4572 4573define <2 x i64> @test_mm_setzero_si128() { 4574; SSE-LABEL: test_mm_setzero_si128: 4575; SSE: # %bb.0: 4576; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 4577; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4578; 4579; AVX1-LABEL: test_mm_setzero_si128: 4580; AVX1: # %bb.0: 4581; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 4582; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4583; 4584; AVX512-LABEL: test_mm_setzero_si128: 4585; AVX512: # %bb.0: 4586; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 4587; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4588 ret <2 x i64> zeroinitializer 4589} 4590 4591define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { 4592; SSE-LABEL: test_mm_shuffle_epi32: 4593; SSE: # %bb.0: 4594; SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] 4595; SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 4596; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4597; 4598; AVX1-LABEL: test_mm_shuffle_epi32: 4599; AVX1: # %bb.0: 4600; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 4601; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 4602; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4603; 4604; AVX512-LABEL: test_mm_shuffle_epi32: 4605; AVX512: # %bb.0: 4606; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 4607; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4608 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4609 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer 4610 %bc = bitcast <4 x i32> %res to <2 x i64> 4611 ret <2 x i64> %bc 4612} 4613 4614define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { 4615; SSE-LABEL: test_mm_shuffle_pd: 4616; SSE: # %bb.0: 4617; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01] 4618; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0] 4619; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4620; 4621; AVX1-LABEL: test_mm_shuffle_pd: 4622; AVX1: # %bb.0: 4623; AVX1-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4624; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[0] 4625; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4626; 4627; AVX512-LABEL: test_mm_shuffle_pd: 4628; AVX512: # %bb.0: 4629; AVX512-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01] 4630; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[0] 4631; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4632 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 4633 ret <2 x double> %res 4634} 4635 4636define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { 4637; SSE-LABEL: test_mm_shufflehi_epi16: 4638; SSE: # %bb.0: 4639; SSE-NEXT: pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00] 4640; SSE-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4641; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4642; 4643; AVX1-LABEL: test_mm_shufflehi_epi16: 4644; AVX1: # %bb.0: 4645; AVX1-NEXT: vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4646; AVX1-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4647; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4648; 4649; AVX512-LABEL: test_mm_shufflehi_epi16: 4650; AVX512: # %bb.0: 4651; AVX512-NEXT: vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00] 4652; AVX512-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] 4653; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4654 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4655 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 4656 %bc = bitcast <8 x i16> %res to <2 x i64> 4657 ret <2 x i64> %bc 4658} 4659 4660define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { 4661; SSE-LABEL: test_mm_shufflelo_epi16: 4662; SSE: # %bb.0: 4663; SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] 4664; SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4665; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4666; 4667; AVX1-LABEL: test_mm_shufflelo_epi16: 4668; AVX1: # %bb.0: 4669; AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4670; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4671; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4672; 4673; AVX512-LABEL: test_mm_shufflelo_epi16: 4674; AVX512: # %bb.0: 4675; AVX512-NEXT: vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00] 4676; AVX512-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] 4677; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4678 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4679 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 4680 %bc = bitcast <8 x i16> %res to <2 x i64> 4681 ret <2 x i64> %bc 4682} 4683 4684define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { 4685; SSE-LABEL: test_mm_sll_epi16: 4686; SSE: # %bb.0: 4687; SSE-NEXT: psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1] 4688; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4689; 4690; AVX1-LABEL: test_mm_sll_epi16: 4691; AVX1: # %bb.0: 4692; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1] 4693; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4694; 4695; AVX512-LABEL: test_mm_sll_epi16: 4696; AVX512: # %bb.0: 4697; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] 4698; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4699 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4700 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 4701 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) 4702 %bc = bitcast <8 x i16> %res to <2 x i64> 4703 ret <2 x i64> %bc 4704} 4705declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 4706 4707define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { 4708; SSE-LABEL: test_mm_sll_epi32: 4709; SSE: # %bb.0: 4710; SSE-NEXT: pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1] 4711; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4712; 4713; AVX1-LABEL: test_mm_sll_epi32: 4714; AVX1: # %bb.0: 4715; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1] 4716; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4717; 4718; AVX512-LABEL: test_mm_sll_epi32: 4719; AVX512: # %bb.0: 4720; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] 4721; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4722 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4723 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 4724 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) 4725 %bc = bitcast <4 x i32> %res to <2 x i64> 4726 ret <2 x i64> %bc 4727} 4728declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 4729 4730define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { 4731; SSE-LABEL: test_mm_sll_epi64: 4732; SSE: # %bb.0: 4733; SSE-NEXT: psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1] 4734; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4735; 4736; AVX1-LABEL: test_mm_sll_epi64: 4737; AVX1: # %bb.0: 4738; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1] 4739; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4740; 4741; AVX512-LABEL: test_mm_sll_epi64: 4742; AVX512: # %bb.0: 4743; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] 4744; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4745 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 4746 ret <2 x i64> %res 4747} 4748declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 4749 4750define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { 4751; SSE-LABEL: test_mm_slli_epi16: 4752; SSE: # %bb.0: 4753; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01] 4754; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4755; 4756; AVX1-LABEL: test_mm_slli_epi16: 4757; AVX1: # %bb.0: 4758; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4759; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4760; 4761; AVX512-LABEL: test_mm_slli_epi16: 4762; AVX512: # %bb.0: 4763; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01] 4764; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4765 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4766 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) 4767 %bc = bitcast <8 x i16> %res to <2 x i64> 4768 ret <2 x i64> %bc 4769} 4770declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 4771 4772define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { 4773; SSE-LABEL: test_mm_slli_epi32: 4774; SSE: # %bb.0: 4775; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01] 4776; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4777; 4778; AVX1-LABEL: test_mm_slli_epi32: 4779; AVX1: # %bb.0: 4780; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4781; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4782; 4783; AVX512-LABEL: test_mm_slli_epi32: 4784; AVX512: # %bb.0: 4785; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01] 4786; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4787 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4788 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) 4789 %bc = bitcast <4 x i32> %res to <2 x i64> 4790 ret <2 x i64> %bc 4791} 4792declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 4793 4794define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { 4795; SSE-LABEL: test_mm_slli_epi64: 4796; SSE: # %bb.0: 4797; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01] 4798; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4799; 4800; AVX1-LABEL: test_mm_slli_epi64: 4801; AVX1: # %bb.0: 4802; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4803; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4804; 4805; AVX512-LABEL: test_mm_slli_epi64: 4806; AVX512: # %bb.0: 4807; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01] 4808; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4809 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) 4810 ret <2 x i64> %res 4811} 4812declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 4813 4814define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { 4815; SSE-LABEL: test_mm_slli_si128: 4816; SSE: # %bb.0: 4817; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] 4818; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4819; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4820; 4821; AVX1-LABEL: test_mm_slli_si128: 4822; AVX1: # %bb.0: 4823; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4824; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4825; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4826; 4827; AVX512-LABEL: test_mm_slli_si128: 4828; AVX512: # %bb.0: 4829; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] 4830; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 4831; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4832 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 4833 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 4834 %bc = bitcast <16 x i8> %res to <2 x i64> 4835 ret <2 x i64> %bc 4836} 4837 4838define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { 4839; SSE-LABEL: test_mm_sqrt_pd: 4840; SSE: # %bb.0: 4841; SSE-NEXT: sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0] 4842; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4843; 4844; AVX1-LABEL: test_mm_sqrt_pd: 4845; AVX1: # %bb.0: 4846; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0] 4847; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4848; 4849; AVX512-LABEL: test_mm_sqrt_pd: 4850; AVX512: # %bb.0: 4851; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] 4852; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4853 %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) 4854 ret <2 x double> %res 4855} 4856declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone 4857 4858define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 4859; SSE-LABEL: test_mm_sqrt_sd: 4860; SSE: # %bb.0: 4861; SSE-NEXT: sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8] 4862; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] 4863; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4864; 4865; AVX1-LABEL: test_mm_sqrt_sd: 4866; AVX1: # %bb.0: 4867; AVX1-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0] 4868; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4869; 4870; AVX512-LABEL: test_mm_sqrt_sd: 4871; AVX512: # %bb.0: 4872; AVX512-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf3,0x51,0xc0] 4873; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4874 %ext = extractelement <2 x double> %a0, i32 0 4875 %sqrt = call double @llvm.sqrt.f64(double %ext) 4876 %ins = insertelement <2 x double> %a1, double %sqrt, i32 0 4877 ret <2 x double> %ins 4878} 4879declare double @llvm.sqrt.f64(double) nounwind readnone 4880 4881; This doesn't match a clang test, but helps with fast-isel coverage. 4882define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { 4883; X86-SSE-LABEL: test_mm_sqrt_sd_scalar: 4884; X86-SSE: # %bb.0: 4885; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 4886; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4887; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4888; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4889; X86-SSE-NEXT: movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08] 4890; X86-SSE-NEXT: # xmm0 = mem[0],zero 4891; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 4892; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24] 4893; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4894; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4895; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 4896; X86-SSE-NEXT: retl # encoding: [0xc3] 4897; 4898; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar: 4899; X86-AVX1: # %bb.0: 4900; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] 4901; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4902; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4903; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4904; X86-AVX1-NEXT: vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08] 4905; X86-AVX1-NEXT: # xmm0 = mem[0],zero 4906; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 4907; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24] 4908; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4909; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4910; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] 4911; X86-AVX1-NEXT: retl # encoding: [0xc3] 4912; 4913; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar: 4914; X86-AVX512: # %bb.0: 4915; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] 4916; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 4917; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] 4918; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] 4919; X86-AVX512-NEXT: vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08] 4920; X86-AVX512-NEXT: # xmm0 = mem[0],zero 4921; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 4922; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24] 4923; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] 4924; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 4925; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] 4926; X86-AVX512-NEXT: retl # encoding: [0xc3] 4927; 4928; X64-SSE-LABEL: test_mm_sqrt_sd_scalar: 4929; X64-SSE: # %bb.0: 4930; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] 4931; X64-SSE-NEXT: retq # encoding: [0xc3] 4932; 4933; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: 4934; X64-AVX1: # %bb.0: 4935; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] 4936; X64-AVX1-NEXT: retq # encoding: [0xc3] 4937; 4938; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: 4939; X64-AVX512: # %bb.0: 4940; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] 4941; X64-AVX512-NEXT: retq # encoding: [0xc3] 4942 %sqrt = call double @llvm.sqrt.f64(double %a0) 4943 ret double %sqrt 4944} 4945 4946define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { 4947; SSE-LABEL: test_mm_sra_epi16: 4948; SSE: # %bb.0: 4949; SSE-NEXT: psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1] 4950; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4951; 4952; AVX1-LABEL: test_mm_sra_epi16: 4953; AVX1: # %bb.0: 4954; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1] 4955; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4956; 4957; AVX512-LABEL: test_mm_sra_epi16: 4958; AVX512: # %bb.0: 4959; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] 4960; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4961 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 4962 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 4963 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) 4964 %bc = bitcast <8 x i16> %res to <2 x i64> 4965 ret <2 x i64> %bc 4966} 4967declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 4968 4969define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { 4970; SSE-LABEL: test_mm_sra_epi32: 4971; SSE: # %bb.0: 4972; SSE-NEXT: psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1] 4973; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4974; 4975; AVX1-LABEL: test_mm_sra_epi32: 4976; AVX1: # %bb.0: 4977; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1] 4978; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4979; 4980; AVX512-LABEL: test_mm_sra_epi32: 4981; AVX512: # %bb.0: 4982; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] 4983; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4984 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 4985 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 4986 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) 4987 %bc = bitcast <4 x i32> %res to <2 x i64> 4988 ret <2 x i64> %bc 4989} 4990declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 4991 4992define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { 4993; SSE-LABEL: test_mm_srai_epi16: 4994; SSE: # %bb.0: 4995; SSE-NEXT: psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01] 4996; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4997; 4998; AVX1-LABEL: test_mm_srai_epi16: 4999; AVX1: # %bb.0: 5000; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5001; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5002; 5003; AVX512-LABEL: test_mm_srai_epi16: 5004; AVX512: # %bb.0: 5005; AVX512-NEXT: vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01] 5006; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5007 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5008 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) 5009 %bc = bitcast <8 x i16> %res to <2 x i64> 5010 ret <2 x i64> %bc 5011} 5012declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 5013 5014define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { 5015; SSE-LABEL: test_mm_srai_epi32: 5016; SSE: # %bb.0: 5017; SSE-NEXT: psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01] 5018; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5019; 5020; AVX1-LABEL: test_mm_srai_epi32: 5021; AVX1: # %bb.0: 5022; AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5023; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5024; 5025; AVX512-LABEL: test_mm_srai_epi32: 5026; AVX512: # %bb.0: 5027; AVX512-NEXT: vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01] 5028; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5029 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5030 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) 5031 %bc = bitcast <4 x i32> %res to <2 x i64> 5032 ret <2 x i64> %bc 5033} 5034declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 5035 5036define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 5037; SSE-LABEL: test_mm_srl_epi16: 5038; SSE: # %bb.0: 5039; SSE-NEXT: psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1] 5040; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5041; 5042; AVX1-LABEL: test_mm_srl_epi16: 5043; AVX1: # %bb.0: 5044; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1] 5045; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5046; 5047; AVX512-LABEL: test_mm_srl_epi16: 5048; AVX512: # %bb.0: 5049; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] 5050; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5051 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5052 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5053 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) 5054 %bc = bitcast <8 x i16> %res to <2 x i64> 5055 ret <2 x i64> %bc 5056} 5057declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 5058 5059define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 5060; SSE-LABEL: test_mm_srl_epi32: 5061; SSE: # %bb.0: 5062; SSE-NEXT: psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1] 5063; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5064; 5065; AVX1-LABEL: test_mm_srl_epi32: 5066; AVX1: # %bb.0: 5067; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1] 5068; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5069; 5070; AVX512-LABEL: test_mm_srl_epi32: 5071; AVX512: # %bb.0: 5072; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] 5073; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5074 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5075 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5076 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) 5077 %bc = bitcast <4 x i32> %res to <2 x i64> 5078 ret <2 x i64> %bc 5079} 5080declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 5081 5082define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 5083; SSE-LABEL: test_mm_srl_epi64: 5084; SSE: # %bb.0: 5085; SSE-NEXT: psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1] 5086; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5087; 5088; AVX1-LABEL: test_mm_srl_epi64: 5089; AVX1: # %bb.0: 5090; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1] 5091; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5092; 5093; AVX512-LABEL: test_mm_srl_epi64: 5094; AVX512: # %bb.0: 5095; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] 5096; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5097 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 5098 ret <2 x i64> %res 5099} 5100declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 5101 5102define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { 5103; SSE-LABEL: test_mm_srli_epi16: 5104; SSE: # %bb.0: 5105; SSE-NEXT: psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01] 5106; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5107; 5108; AVX1-LABEL: test_mm_srli_epi16: 5109; AVX1: # %bb.0: 5110; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5111; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5112; 5113; AVX512-LABEL: test_mm_srli_epi16: 5114; AVX512: # %bb.0: 5115; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01] 5116; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5117 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5118 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) 5119 %bc = bitcast <8 x i16> %res to <2 x i64> 5120 ret <2 x i64> %bc 5121} 5122declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 5123 5124define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { 5125; SSE-LABEL: test_mm_srli_epi32: 5126; SSE: # %bb.0: 5127; SSE-NEXT: psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01] 5128; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5129; 5130; AVX1-LABEL: test_mm_srli_epi32: 5131; AVX1: # %bb.0: 5132; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5133; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5134; 5135; AVX512-LABEL: test_mm_srli_epi32: 5136; AVX512: # %bb.0: 5137; AVX512-NEXT: vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01] 5138; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5139 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5140 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) 5141 %bc = bitcast <4 x i32> %res to <2 x i64> 5142 ret <2 x i64> %bc 5143} 5144declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 5145 5146define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { 5147; SSE-LABEL: test_mm_srli_epi64: 5148; SSE: # %bb.0: 5149; SSE-NEXT: psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01] 5150; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5151; 5152; AVX1-LABEL: test_mm_srli_epi64: 5153; AVX1: # %bb.0: 5154; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5155; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5156; 5157; AVX512-LABEL: test_mm_srli_epi64: 5158; AVX512: # %bb.0: 5159; AVX512-NEXT: vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01] 5160; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5161 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) 5162 ret <2 x i64> %res 5163} 5164declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 5165 5166define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { 5167; SSE-LABEL: test_mm_srli_si128: 5168; SSE: # %bb.0: 5169; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] 5170; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5171; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5172; 5173; AVX1-LABEL: test_mm_srli_si128: 5174; AVX1: # %bb.0: 5175; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5176; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5177; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5178; 5179; AVX512-LABEL: test_mm_srli_si128: 5180; AVX512: # %bb.0: 5181; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] 5182; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 5183; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5184 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5185 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 5186 %bc = bitcast <16 x i8> %res to <2 x i64> 5187 ret <2 x i64> %bc 5188} 5189 5190define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { 5191; X86-SSE-LABEL: test_mm_store_pd: 5192; X86-SSE: # %bb.0: 5193; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5194; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5195; X86-SSE-NEXT: retl # encoding: [0xc3] 5196; 5197; X86-AVX1-LABEL: test_mm_store_pd: 5198; X86-AVX1: # %bb.0: 5199; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5200; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5201; X86-AVX1-NEXT: retl # encoding: [0xc3] 5202; 5203; X86-AVX512-LABEL: test_mm_store_pd: 5204; X86-AVX512: # %bb.0: 5205; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5206; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5207; X86-AVX512-NEXT: retl # encoding: [0xc3] 5208; 5209; X64-SSE-LABEL: test_mm_store_pd: 5210; X64-SSE: # %bb.0: 5211; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5212; X64-SSE-NEXT: retq # encoding: [0xc3] 5213; 5214; X64-AVX1-LABEL: test_mm_store_pd: 5215; X64-AVX1: # %bb.0: 5216; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5217; X64-AVX1-NEXT: retq # encoding: [0xc3] 5218; 5219; X64-AVX512-LABEL: test_mm_store_pd: 5220; X64-AVX512: # %bb.0: 5221; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5222; X64-AVX512-NEXT: retq # encoding: [0xc3] 5223 %arg0 = bitcast double* %a0 to <2 x double>* 5224 store <2 x double> %a1, <2 x double>* %arg0, align 16 5225 ret void 5226} 5227 5228define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { 5229; X86-SSE-LABEL: test_mm_store_pd1: 5230; X86-SSE: # %bb.0: 5231; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5232; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5233; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5234; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5235; X86-SSE-NEXT: retl # encoding: [0xc3] 5236; 5237; X86-AVX1-LABEL: test_mm_store_pd1: 5238; X86-AVX1: # %bb.0: 5239; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5240; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5241; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5242; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5243; X86-AVX1-NEXT: retl # encoding: [0xc3] 5244; 5245; X86-AVX512-LABEL: test_mm_store_pd1: 5246; X86-AVX512: # %bb.0: 5247; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5248; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5249; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5250; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5251; X86-AVX512-NEXT: retl # encoding: [0xc3] 5252; 5253; X64-SSE-LABEL: test_mm_store_pd1: 5254; X64-SSE: # %bb.0: 5255; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5256; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5257; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5258; X64-SSE-NEXT: retq # encoding: [0xc3] 5259; 5260; X64-AVX1-LABEL: test_mm_store_pd1: 5261; X64-AVX1: # %bb.0: 5262; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5263; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5264; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5265; X64-AVX1-NEXT: retq # encoding: [0xc3] 5266; 5267; X64-AVX512-LABEL: test_mm_store_pd1: 5268; X64-AVX512: # %bb.0: 5269; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5270; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5271; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5272; X64-AVX512-NEXT: retq # encoding: [0xc3] 5273 %arg0 = bitcast double * %a0 to <2 x double>* 5274 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5275 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5276 ret void 5277} 5278 5279define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { 5280; X86-SSE-LABEL: test_mm_store_sd: 5281; X86-SSE: # %bb.0: 5282; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5283; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5284; X86-SSE-NEXT: retl # encoding: [0xc3] 5285; 5286; X86-AVX1-LABEL: test_mm_store_sd: 5287; X86-AVX1: # %bb.0: 5288; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5289; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5290; X86-AVX1-NEXT: retl # encoding: [0xc3] 5291; 5292; X86-AVX512-LABEL: test_mm_store_sd: 5293; X86-AVX512: # %bb.0: 5294; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5295; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5296; X86-AVX512-NEXT: retl # encoding: [0xc3] 5297; 5298; X64-SSE-LABEL: test_mm_store_sd: 5299; X64-SSE: # %bb.0: 5300; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5301; X64-SSE-NEXT: retq # encoding: [0xc3] 5302; 5303; X64-AVX1-LABEL: test_mm_store_sd: 5304; X64-AVX1: # %bb.0: 5305; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5306; X64-AVX1-NEXT: retq # encoding: [0xc3] 5307; 5308; X64-AVX512-LABEL: test_mm_store_sd: 5309; X64-AVX512: # %bb.0: 5310; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5311; X64-AVX512-NEXT: retq # encoding: [0xc3] 5312 %ext = extractelement <2 x double> %a1, i32 0 5313 store double %ext, double* %a0, align 1 5314 ret void 5315} 5316 5317define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5318; X86-SSE-LABEL: test_mm_store_si128: 5319; X86-SSE: # %bb.0: 5320; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5321; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5322; X86-SSE-NEXT: retl # encoding: [0xc3] 5323; 5324; X86-AVX1-LABEL: test_mm_store_si128: 5325; X86-AVX1: # %bb.0: 5326; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5327; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 5328; X86-AVX1-NEXT: retl # encoding: [0xc3] 5329; 5330; X86-AVX512-LABEL: test_mm_store_si128: 5331; X86-AVX512: # %bb.0: 5332; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5333; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 5334; X86-AVX512-NEXT: retl # encoding: [0xc3] 5335; 5336; X64-SSE-LABEL: test_mm_store_si128: 5337; X64-SSE: # %bb.0: 5338; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5339; X64-SSE-NEXT: retq # encoding: [0xc3] 5340; 5341; X64-AVX1-LABEL: test_mm_store_si128: 5342; X64-AVX1: # %bb.0: 5343; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 5344; X64-AVX1-NEXT: retq # encoding: [0xc3] 5345; 5346; X64-AVX512-LABEL: test_mm_store_si128: 5347; X64-AVX512: # %bb.0: 5348; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 5349; X64-AVX512-NEXT: retq # encoding: [0xc3] 5350 store <2 x i64> %a1, <2 x i64>* %a0, align 16 5351 ret void 5352} 5353 5354define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { 5355; X86-SSE-LABEL: test_mm_store1_pd: 5356; X86-SSE: # %bb.0: 5357; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5358; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5359; X86-SSE-NEXT: # xmm0 = xmm0[0,0] 5360; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 5361; X86-SSE-NEXT: retl # encoding: [0xc3] 5362; 5363; X86-AVX1-LABEL: test_mm_store1_pd: 5364; X86-AVX1: # %bb.0: 5365; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5366; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5367; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] 5368; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5369; X86-AVX1-NEXT: retl # encoding: [0xc3] 5370; 5371; X86-AVX512-LABEL: test_mm_store1_pd: 5372; X86-AVX512: # %bb.0: 5373; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5374; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5375; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] 5376; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5377; X86-AVX512-NEXT: retl # encoding: [0xc3] 5378; 5379; X64-SSE-LABEL: test_mm_store1_pd: 5380; X64-SSE: # %bb.0: 5381; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] 5382; X64-SSE-NEXT: # xmm0 = xmm0[0,0] 5383; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 5384; X64-SSE-NEXT: retq # encoding: [0xc3] 5385; 5386; X64-AVX1-LABEL: test_mm_store1_pd: 5387; X64-AVX1: # %bb.0: 5388; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] 5389; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] 5390; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5391; X64-AVX1-NEXT: retq # encoding: [0xc3] 5392; 5393; X64-AVX512-LABEL: test_mm_store1_pd: 5394; X64-AVX512: # %bb.0: 5395; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] 5396; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] 5397; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5398; X64-AVX512-NEXT: retq # encoding: [0xc3] 5399 %arg0 = bitcast double * %a0 to <2 x double>* 5400 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 5401 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5402 ret void 5403} 5404 5405define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { 5406; X86-SSE-LABEL: test_mm_storeh_sd: 5407; X86-SSE: # %bb.0: 5408; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5409; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5410; X86-SSE-NEXT: # xmm0 = xmm0[1,1] 5411; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5412; X86-SSE-NEXT: retl # encoding: [0xc3] 5413; 5414; X86-AVX1-LABEL: test_mm_storeh_sd: 5415; X86-AVX1: # %bb.0: 5416; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5417; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5418; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5419; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5420; X86-AVX1-NEXT: retl # encoding: [0xc3] 5421; 5422; X86-AVX512-LABEL: test_mm_storeh_sd: 5423; X86-AVX512: # %bb.0: 5424; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5425; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5426; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5427; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5428; X86-AVX512-NEXT: retl # encoding: [0xc3] 5429; 5430; X64-SSE-LABEL: test_mm_storeh_sd: 5431; X64-SSE: # %bb.0: 5432; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] 5433; X64-SSE-NEXT: # xmm0 = xmm0[1,1] 5434; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5435; X64-SSE-NEXT: retq # encoding: [0xc3] 5436; 5437; X64-AVX1-LABEL: test_mm_storeh_sd: 5438; X64-AVX1: # %bb.0: 5439; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5440; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5441; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5442; X64-AVX1-NEXT: retq # encoding: [0xc3] 5443; 5444; X64-AVX512-LABEL: test_mm_storeh_sd: 5445; X64-AVX512: # %bb.0: 5446; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5447; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5448; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5449; X64-AVX512-NEXT: retq # encoding: [0xc3] 5450 %ext = extractelement <2 x double> %a1, i32 1 5451 store double %ext, double* %a0, align 8 5452 ret void 5453} 5454 5455define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { 5456; X86-SSE-LABEL: test_mm_storel_epi64: 5457; X86-SSE: # %bb.0: 5458; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5459; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 5460; X86-SSE-NEXT: retl # encoding: [0xc3] 5461; 5462; X86-AVX1-LABEL: test_mm_storel_epi64: 5463; X86-AVX1: # %bb.0: 5464; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5465; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 5466; X86-AVX1-NEXT: retl # encoding: [0xc3] 5467; 5468; X86-AVX512-LABEL: test_mm_storel_epi64: 5469; X86-AVX512: # %bb.0: 5470; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5471; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 5472; X86-AVX512-NEXT: retl # encoding: [0xc3] 5473; 5474; X64-SSE-LABEL: test_mm_storel_epi64: 5475; X64-SSE: # %bb.0: 5476; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] 5477; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5478; X64-SSE-NEXT: retq # encoding: [0xc3] 5479; 5480; X64-AVX1-LABEL: test_mm_storel_epi64: 5481; X64-AVX1: # %bb.0: 5482; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5483; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5484; X64-AVX1-NEXT: retq # encoding: [0xc3] 5485; 5486; X64-AVX512-LABEL: test_mm_storel_epi64: 5487; X64-AVX512: # %bb.0: 5488; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 5489; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 5490; X64-AVX512-NEXT: retq # encoding: [0xc3] 5491 %ext = extractelement <2 x i64> %a1, i32 0 5492 %bc = bitcast <2 x i64> *%a0 to i64* 5493 store i64 %ext, i64* %bc, align 8 5494 ret void 5495} 5496 5497define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { 5498; X86-SSE-LABEL: test_mm_storel_sd: 5499; X86-SSE: # %bb.0: 5500; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5501; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] 5502; X86-SSE-NEXT: retl # encoding: [0xc3] 5503; 5504; X86-AVX1-LABEL: test_mm_storel_sd: 5505; X86-AVX1: # %bb.0: 5506; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5507; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] 5508; X86-AVX1-NEXT: retl # encoding: [0xc3] 5509; 5510; X86-AVX512-LABEL: test_mm_storel_sd: 5511; X86-AVX512: # %bb.0: 5512; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5513; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] 5514; X86-AVX512-NEXT: retl # encoding: [0xc3] 5515; 5516; X64-SSE-LABEL: test_mm_storel_sd: 5517; X64-SSE: # %bb.0: 5518; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] 5519; X64-SSE-NEXT: retq # encoding: [0xc3] 5520; 5521; X64-AVX1-LABEL: test_mm_storel_sd: 5522; X64-AVX1: # %bb.0: 5523; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 5524; X64-AVX1-NEXT: retq # encoding: [0xc3] 5525; 5526; X64-AVX512-LABEL: test_mm_storel_sd: 5527; X64-AVX512: # %bb.0: 5528; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 5529; X64-AVX512-NEXT: retq # encoding: [0xc3] 5530 %ext = extractelement <2 x double> %a1, i32 0 5531 store double %ext, double* %a0, align 8 5532 ret void 5533} 5534 5535define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { 5536; X86-SSE-LABEL: test_mm_storer_pd: 5537; X86-SSE: # %bb.0: 5538; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5539; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 5540; X86-SSE-NEXT: # xmm0 = xmm0[1,0] 5541; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00] 5542; X86-SSE-NEXT: retl # encoding: [0xc3] 5543; 5544; X86-AVX1-LABEL: test_mm_storer_pd: 5545; X86-AVX1: # %bb.0: 5546; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5547; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5548; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] 5549; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] 5550; X86-AVX1-NEXT: retl # encoding: [0xc3] 5551; 5552; X86-AVX512-LABEL: test_mm_storer_pd: 5553; X86-AVX512: # %bb.0: 5554; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5555; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5556; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] 5557; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 5558; X86-AVX512-NEXT: retl # encoding: [0xc3] 5559; 5560; X64-SSE-LABEL: test_mm_storer_pd: 5561; X64-SSE: # %bb.0: 5562; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] 5563; X64-SSE-NEXT: # xmm0 = xmm0[1,0] 5564; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07] 5565; X64-SSE-NEXT: retq # encoding: [0xc3] 5566; 5567; X64-AVX1-LABEL: test_mm_storer_pd: 5568; X64-AVX1: # %bb.0: 5569; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5570; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] 5571; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] 5572; X64-AVX1-NEXT: retq # encoding: [0xc3] 5573; 5574; X64-AVX512-LABEL: test_mm_storer_pd: 5575; X64-AVX512: # %bb.0: 5576; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] 5577; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] 5578; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] 5579; X64-AVX512-NEXT: retq # encoding: [0xc3] 5580 %arg0 = bitcast double* %a0 to <2 x double>* 5581 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> 5582 store <2 x double> %shuf, <2 x double>* %arg0, align 16 5583 ret void 5584} 5585 5586define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { 5587; X86-SSE-LABEL: test_mm_storeu_pd: 5588; X86-SSE: # %bb.0: 5589; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5590; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5591; X86-SSE-NEXT: retl # encoding: [0xc3] 5592; 5593; X86-AVX1-LABEL: test_mm_storeu_pd: 5594; X86-AVX1: # %bb.0: 5595; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5596; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5597; X86-AVX1-NEXT: retl # encoding: [0xc3] 5598; 5599; X86-AVX512-LABEL: test_mm_storeu_pd: 5600; X86-AVX512: # %bb.0: 5601; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5602; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5603; X86-AVX512-NEXT: retl # encoding: [0xc3] 5604; 5605; X64-SSE-LABEL: test_mm_storeu_pd: 5606; X64-SSE: # %bb.0: 5607; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5608; X64-SSE-NEXT: retq # encoding: [0xc3] 5609; 5610; X64-AVX1-LABEL: test_mm_storeu_pd: 5611; X64-AVX1: # %bb.0: 5612; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5613; X64-AVX1-NEXT: retq # encoding: [0xc3] 5614; 5615; X64-AVX512-LABEL: test_mm_storeu_pd: 5616; X64-AVX512: # %bb.0: 5617; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5618; X64-AVX512-NEXT: retq # encoding: [0xc3] 5619 %arg0 = bitcast double* %a0 to <2 x double>* 5620 store <2 x double> %a1, <2 x double>* %arg0, align 1 5621 ret void 5622} 5623 5624define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5625; X86-SSE-LABEL: test_mm_storeu_si128: 5626; X86-SSE: # %bb.0: 5627; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5628; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 5629; X86-SSE-NEXT: retl # encoding: [0xc3] 5630; 5631; X86-AVX1-LABEL: test_mm_storeu_si128: 5632; X86-AVX1: # %bb.0: 5633; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5634; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 5635; X86-AVX1-NEXT: retl # encoding: [0xc3] 5636; 5637; X86-AVX512-LABEL: test_mm_storeu_si128: 5638; X86-AVX512: # %bb.0: 5639; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5640; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 5641; X86-AVX512-NEXT: retl # encoding: [0xc3] 5642; 5643; X64-SSE-LABEL: test_mm_storeu_si128: 5644; X64-SSE: # %bb.0: 5645; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 5646; X64-SSE-NEXT: retq # encoding: [0xc3] 5647; 5648; X64-AVX1-LABEL: test_mm_storeu_si128: 5649; X64-AVX1: # %bb.0: 5650; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 5651; X64-AVX1-NEXT: retq # encoding: [0xc3] 5652; 5653; X64-AVX512-LABEL: test_mm_storeu_si128: 5654; X64-AVX512: # %bb.0: 5655; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 5656; X64-AVX512-NEXT: retq # encoding: [0xc3] 5657 store <2 x i64> %a1, <2 x i64>* %a0, align 1 5658 ret void 5659} 5660 5661define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { 5662; X86-SSE-LABEL: test_mm_stream_pd: 5663; X86-SSE: # %bb.0: 5664; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5665; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5666; X86-SSE-NEXT: retl # encoding: [0xc3] 5667; 5668; X86-AVX1-LABEL: test_mm_stream_pd: 5669; X86-AVX1: # %bb.0: 5670; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5671; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5672; X86-AVX1-NEXT: retl # encoding: [0xc3] 5673; 5674; X86-AVX512-LABEL: test_mm_stream_pd: 5675; X86-AVX512: # %bb.0: 5676; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5677; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5678; X86-AVX512-NEXT: retl # encoding: [0xc3] 5679; 5680; X64-SSE-LABEL: test_mm_stream_pd: 5681; X64-SSE: # %bb.0: 5682; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5683; X64-SSE-NEXT: retq # encoding: [0xc3] 5684; 5685; X64-AVX1-LABEL: test_mm_stream_pd: 5686; X64-AVX1: # %bb.0: 5687; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5688; X64-AVX1-NEXT: retq # encoding: [0xc3] 5689; 5690; X64-AVX512-LABEL: test_mm_stream_pd: 5691; X64-AVX512: # %bb.0: 5692; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5693; X64-AVX512-NEXT: retq # encoding: [0xc3] 5694 %arg0 = bitcast double* %a0 to <2 x double>* 5695 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 5696 ret void 5697} 5698 5699define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { 5700; X86-LABEL: test_mm_stream_si32: 5701; X86: # %bb.0: 5702; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 5703; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 5704; X86-NEXT: movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01] 5705; X86-NEXT: retl # encoding: [0xc3] 5706; 5707; X64-LABEL: test_mm_stream_si32: 5708; X64: # %bb.0: 5709; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] 5710; X64-NEXT: retq # encoding: [0xc3] 5711 store i32 %a1, i32* %a0, align 1, !nontemporal !0 5712 ret void 5713} 5714 5715define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { 5716; X86-SSE-LABEL: test_mm_stream_si128: 5717; X86-SSE: # %bb.0: 5718; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5719; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 5720; X86-SSE-NEXT: retl # encoding: [0xc3] 5721; 5722; X86-AVX1-LABEL: test_mm_stream_si128: 5723; X86-AVX1: # %bb.0: 5724; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5725; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 5726; X86-AVX1-NEXT: retl # encoding: [0xc3] 5727; 5728; X86-AVX512-LABEL: test_mm_stream_si128: 5729; X86-AVX512: # %bb.0: 5730; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 5731; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 5732; X86-AVX512-NEXT: retl # encoding: [0xc3] 5733; 5734; X64-SSE-LABEL: test_mm_stream_si128: 5735; X64-SSE: # %bb.0: 5736; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 5737; X64-SSE-NEXT: retq # encoding: [0xc3] 5738; 5739; X64-AVX1-LABEL: test_mm_stream_si128: 5740; X64-AVX1: # %bb.0: 5741; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 5742; X64-AVX1-NEXT: retq # encoding: [0xc3] 5743; 5744; X64-AVX512-LABEL: test_mm_stream_si128: 5745; X64-AVX512: # %bb.0: 5746; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 5747; X64-AVX512-NEXT: retq # encoding: [0xc3] 5748 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 5749 ret void 5750} 5751 5752define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5753; SSE-LABEL: test_mm_sub_epi8: 5754; SSE: # %bb.0: 5755; SSE-NEXT: psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1] 5756; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5757; 5758; AVX1-LABEL: test_mm_sub_epi8: 5759; AVX1: # %bb.0: 5760; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] 5761; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5762; 5763; AVX512-LABEL: test_mm_sub_epi8: 5764; AVX512: # %bb.0: 5765; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] 5766; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5767 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5768 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5769 %res = sub <16 x i8> %arg0, %arg1 5770 %bc = bitcast <16 x i8> %res to <2 x i64> 5771 ret <2 x i64> %bc 5772} 5773 5774define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5775; SSE-LABEL: test_mm_sub_epi16: 5776; SSE: # %bb.0: 5777; SSE-NEXT: psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1] 5778; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5779; 5780; AVX1-LABEL: test_mm_sub_epi16: 5781; AVX1: # %bb.0: 5782; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1] 5783; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5784; 5785; AVX512-LABEL: test_mm_sub_epi16: 5786; AVX512: # %bb.0: 5787; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] 5788; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5789 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5790 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5791 %res = sub <8 x i16> %arg0, %arg1 5792 %bc = bitcast <8 x i16> %res to <2 x i64> 5793 ret <2 x i64> %bc 5794} 5795 5796define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5797; SSE-LABEL: test_mm_sub_epi32: 5798; SSE: # %bb.0: 5799; SSE-NEXT: psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1] 5800; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5801; 5802; AVX1-LABEL: test_mm_sub_epi32: 5803; AVX1: # %bb.0: 5804; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1] 5805; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5806; 5807; AVX512-LABEL: test_mm_sub_epi32: 5808; AVX512: # %bb.0: 5809; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 5810; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5811 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 5812 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 5813 %res = sub <4 x i32> %arg0, %arg1 5814 %bc = bitcast <4 x i32> %res to <2 x i64> 5815 ret <2 x i64> %bc 5816} 5817 5818define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5819; SSE-LABEL: test_mm_sub_epi64: 5820; SSE: # %bb.0: 5821; SSE-NEXT: psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1] 5822; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5823; 5824; AVX1-LABEL: test_mm_sub_epi64: 5825; AVX1: # %bb.0: 5826; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] 5827; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5828; 5829; AVX512-LABEL: test_mm_sub_epi64: 5830; AVX512: # %bb.0: 5831; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 5832; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5833 %res = sub <2 x i64> %a0, %a1 5834 ret <2 x i64> %res 5835} 5836 5837define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 5838; SSE-LABEL: test_mm_sub_pd: 5839; SSE: # %bb.0: 5840; SSE-NEXT: subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1] 5841; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5842; 5843; AVX1-LABEL: test_mm_sub_pd: 5844; AVX1: # %bb.0: 5845; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1] 5846; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5847; 5848; AVX512-LABEL: test_mm_sub_pd: 5849; AVX512: # %bb.0: 5850; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1] 5851; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5852 %res = fsub <2 x double> %a0, %a1 5853 ret <2 x double> %res 5854} 5855 5856define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 5857; SSE-LABEL: test_mm_sub_sd: 5858; SSE: # %bb.0: 5859; SSE-NEXT: subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1] 5860; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5861; 5862; AVX1-LABEL: test_mm_sub_sd: 5863; AVX1: # %bb.0: 5864; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1] 5865; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5866; 5867; AVX512-LABEL: test_mm_sub_sd: 5868; AVX512: # %bb.0: 5869; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] 5870; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5871 %ext0 = extractelement <2 x double> %a0, i32 0 5872 %ext1 = extractelement <2 x double> %a1, i32 0 5873 %fsub = fsub double %ext0, %ext1 5874 %res = insertelement <2 x double> %a0, double %fsub, i32 0 5875 ret <2 x double> %res 5876} 5877 5878define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5879; SSE-LABEL: test_mm_subs_epi8: 5880; SSE: # %bb.0: 5881; SSE-NEXT: psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1] 5882; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5883; 5884; AVX1-LABEL: test_mm_subs_epi8: 5885; AVX1: # %bb.0: 5886; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1] 5887; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5888; 5889; AVX512-LABEL: test_mm_subs_epi8: 5890; AVX512: # %bb.0: 5891; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 5892; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5893 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5894 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5895 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1) 5896 %bc = bitcast <16 x i8> %res to <2 x i64> 5897 ret <2 x i64> %bc 5898} 5899declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 5900 5901define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5902; SSE-LABEL: test_mm_subs_epi16: 5903; SSE: # %bb.0: 5904; SSE-NEXT: psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1] 5905; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5906; 5907; AVX1-LABEL: test_mm_subs_epi16: 5908; AVX1: # %bb.0: 5909; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1] 5910; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5911; 5912; AVX512-LABEL: test_mm_subs_epi16: 5913; AVX512: # %bb.0: 5914; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 5915; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5916 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5917 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5918 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1) 5919 %bc = bitcast <8 x i16> %res to <2 x i64> 5920 ret <2 x i64> %bc 5921} 5922declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 5923 5924define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5925; SSE-LABEL: test_mm_subs_epu8: 5926; SSE: # %bb.0: 5927; SSE-NEXT: psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1] 5928; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5929; 5930; AVX1-LABEL: test_mm_subs_epu8: 5931; AVX1: # %bb.0: 5932; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1] 5933; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5934; 5935; AVX512-LABEL: test_mm_subs_epu8: 5936; AVX512: # %bb.0: 5937; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 5938; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5939 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 5940 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 5941 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1) 5942 %bc = bitcast <16 x i8> %res to <2 x i64> 5943 ret <2 x i64> %bc 5944} 5945declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 5946 5947define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 5948; SSE-LABEL: test_mm_subs_epu16: 5949; SSE: # %bb.0: 5950; SSE-NEXT: psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1] 5951; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5952; 5953; AVX1-LABEL: test_mm_subs_epu16: 5954; AVX1: # %bb.0: 5955; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1] 5956; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5957; 5958; AVX512-LABEL: test_mm_subs_epu16: 5959; AVX512: # %bb.0: 5960; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 5961; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5962 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 5963 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 5964 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1) 5965 %bc = bitcast <8 x i16> %res to <2 x i64> 5966 ret <2 x i64> %bc 5967} 5968declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 5969 5970define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 5971; SSE-LABEL: test_mm_ucomieq_sd: 5972; SSE: # %bb.0: 5973; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 5974; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5975; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5976; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5977; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5978; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5979; 5980; AVX1-LABEL: test_mm_ucomieq_sd: 5981; AVX1: # %bb.0: 5982; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 5983; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5984; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5985; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5986; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5987; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5988; 5989; AVX512-LABEL: test_mm_ucomieq_sd: 5990; AVX512: # %bb.0: 5991; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 5992; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 5993; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 5994; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 5995; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 5996; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5997 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 5998 ret i32 %res 5999} 6000declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 6001 6002define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6003; SSE-LABEL: test_mm_ucomige_sd: 6004; SSE: # %bb.0: 6005; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6006; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6007; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6008; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6009; 6010; AVX1-LABEL: test_mm_ucomige_sd: 6011; AVX1: # %bb.0: 6012; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6013; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6014; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6015; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6016; 6017; AVX512-LABEL: test_mm_ucomige_sd: 6018; AVX512: # %bb.0: 6019; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6020; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6021; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6022; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6023 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) 6024 ret i32 %res 6025} 6026declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 6027 6028define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6029; SSE-LABEL: test_mm_ucomigt_sd: 6030; SSE: # %bb.0: 6031; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6032; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6033; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6034; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6035; 6036; AVX1-LABEL: test_mm_ucomigt_sd: 6037; AVX1: # %bb.0: 6038; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6039; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6040; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6041; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6042; 6043; AVX512-LABEL: test_mm_ucomigt_sd: 6044; AVX512: # %bb.0: 6045; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6046; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6047; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6048; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6049 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) 6050 ret i32 %res 6051} 6052declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 6053 6054define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6055; SSE-LABEL: test_mm_ucomile_sd: 6056; SSE: # %bb.0: 6057; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6058; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6059; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6060; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6061; 6062; AVX1-LABEL: test_mm_ucomile_sd: 6063; AVX1: # %bb.0: 6064; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6065; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6066; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6067; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6068; 6069; AVX512-LABEL: test_mm_ucomile_sd: 6070; AVX512: # %bb.0: 6071; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6072; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6073; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 6074; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6075 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) 6076 ret i32 %res 6077} 6078declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 6079 6080define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6081; SSE-LABEL: test_mm_ucomilt_sd: 6082; SSE: # %bb.0: 6083; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6084; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] 6085; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6086; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6087; 6088; AVX1-LABEL: test_mm_ucomilt_sd: 6089; AVX1: # %bb.0: 6090; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6091; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] 6092; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6093; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6094; 6095; AVX512-LABEL: test_mm_ucomilt_sd: 6096; AVX512: # %bb.0: 6097; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 6098; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] 6099; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 6100; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6101 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) 6102 ret i32 %res 6103} 6104declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 6105 6106define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 6107; SSE-LABEL: test_mm_ucomineq_sd: 6108; SSE: # %bb.0: 6109; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] 6110; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6111; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6112; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6113; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6114; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6115; 6116; AVX1-LABEL: test_mm_ucomineq_sd: 6117; AVX1: # %bb.0: 6118; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] 6119; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6120; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6121; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6122; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6123; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6124; 6125; AVX512-LABEL: test_mm_ucomineq_sd: 6126; AVX512: # %bb.0: 6127; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] 6128; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 6129; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 6130; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 6131; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 6132; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6133 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) 6134 ret i32 %res 6135} 6136declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 6137 6138define <2 x double> @test_mm_undefined_pd() { 6139; CHECK-LABEL: test_mm_undefined_pd: 6140; CHECK: # %bb.0: 6141; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6142 ret <2 x double> undef 6143} 6144 6145define <2 x i64> @test_mm_undefined_si128() { 6146; CHECK-LABEL: test_mm_undefined_si128: 6147; CHECK: # %bb.0: 6148; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6149 ret <2 x i64> undef 6150} 6151 6152define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6153; SSE-LABEL: test_mm_unpackhi_epi8: 6154; SSE: # %bb.0: 6155; SSE-NEXT: punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1] 6156; SSE-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6157; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6158; 6159; AVX1-LABEL: test_mm_unpackhi_epi8: 6160; AVX1: # %bb.0: 6161; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1] 6162; AVX1-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6163; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6164; 6165; AVX512-LABEL: test_mm_unpackhi_epi8: 6166; AVX512: # %bb.0: 6167; AVX512-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] 6168; AVX512-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 6169; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6170 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6171 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6172 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6173 %bc = bitcast <16 x i8> %res to <2 x i64> 6174 ret <2 x i64> %bc 6175} 6176 6177define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6178; SSE-LABEL: test_mm_unpackhi_epi16: 6179; SSE: # %bb.0: 6180; SSE-NEXT: punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1] 6181; SSE-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6182; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6183; 6184; AVX1-LABEL: test_mm_unpackhi_epi16: 6185; AVX1: # %bb.0: 6186; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1] 6187; AVX1-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6188; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6189; 6190; AVX512-LABEL: test_mm_unpackhi_epi16: 6191; AVX512: # %bb.0: 6192; AVX512-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] 6193; AVX512-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6194; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6195 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6196 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6197 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 6198 %bc = bitcast <8 x i16> %res to <2 x i64> 6199 ret <2 x i64> %bc 6200} 6201 6202define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6203; SSE-LABEL: test_mm_unpackhi_epi32: 6204; SSE: # %bb.0: 6205; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 6206; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6207; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6208; 6209; AVX1-LABEL: test_mm_unpackhi_epi32: 6210; AVX1: # %bb.0: 6211; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 6212; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6213; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6214; 6215; AVX512-LABEL: test_mm_unpackhi_epi32: 6216; AVX512: # %bb.0: 6217; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 6218; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6219; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6220 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6221 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6222 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 6223 %bc = bitcast <4 x i32> %res to <2 x i64> 6224 ret <2 x i64> %bc 6225} 6226 6227define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6228; SSE-LABEL: test_mm_unpackhi_epi64: 6229; SSE: # %bb.0: 6230; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6231; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6232; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6233; 6234; AVX1-LABEL: test_mm_unpackhi_epi64: 6235; AVX1: # %bb.0: 6236; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6237; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6238; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6239; 6240; AVX512-LABEL: test_mm_unpackhi_epi64: 6241; AVX512: # %bb.0: 6242; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6243; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6244; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6245 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 6246 ret <2 x i64> %res 6247} 6248 6249define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { 6250; SSE-LABEL: test_mm_unpackhi_pd: 6251; SSE: # %bb.0: 6252; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] 6253; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] 6254; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6255; 6256; AVX1-LABEL: test_mm_unpackhi_pd: 6257; AVX1: # %bb.0: 6258; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 6259; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 6260; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6261; 6262; AVX512-LABEL: test_mm_unpackhi_pd: 6263; AVX512: # %bb.0: 6264; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 6265; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 6266; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6267 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 6268 ret <2 x double> %res 6269} 6270 6271define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { 6272; SSE-LABEL: test_mm_unpacklo_epi8: 6273; SSE: # %bb.0: 6274; SSE-NEXT: punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1] 6275; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6276; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6277; 6278; AVX1-LABEL: test_mm_unpacklo_epi8: 6279; AVX1: # %bb.0: 6280; AVX1-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1] 6281; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6282; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6283; 6284; AVX512-LABEL: test_mm_unpacklo_epi8: 6285; AVX512: # %bb.0: 6286; AVX512-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] 6287; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 6288; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 6290 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 6291 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 6292 %bc = bitcast <16 x i8> %res to <2 x i64> 6293 ret <2 x i64> %bc 6294} 6295 6296define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 6297; SSE-LABEL: test_mm_unpacklo_epi16: 6298; SSE: # %bb.0: 6299; SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1] 6300; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6301; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6302; 6303; AVX1-LABEL: test_mm_unpacklo_epi16: 6304; AVX1: # %bb.0: 6305; AVX1-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1] 6306; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6307; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6308; 6309; AVX512-LABEL: test_mm_unpacklo_epi16: 6310; AVX512: # %bb.0: 6311; AVX512-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] 6312; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 6313; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6314 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 6315 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 6316 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 6317 %bc = bitcast <8 x i16> %res to <2 x i64> 6318 ret <2 x i64> %bc 6319} 6320 6321define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 6322; SSE-LABEL: test_mm_unpacklo_epi32: 6323; SSE: # %bb.0: 6324; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 6325; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6326; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6327; 6328; AVX1-LABEL: test_mm_unpacklo_epi32: 6329; AVX1: # %bb.0: 6330; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 6331; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6332; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6333; 6334; AVX512-LABEL: test_mm_unpacklo_epi32: 6335; AVX512: # %bb.0: 6336; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 6337; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6338; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6339 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 6340 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 6341 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 6342 %bc = bitcast <4 x i32> %res to <2 x i64> 6343 ret <2 x i64> %bc 6344} 6345 6346define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { 6347; SSE-LABEL: test_mm_unpacklo_epi64: 6348; SSE: # %bb.0: 6349; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6350; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6351; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6352; 6353; AVX1-LABEL: test_mm_unpacklo_epi64: 6354; AVX1: # %bb.0: 6355; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6356; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6357; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6358; 6359; AVX512-LABEL: test_mm_unpacklo_epi64: 6360; AVX512: # %bb.0: 6361; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6362; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6363; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6364 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 6365 ret <2 x i64> %res 6366} 6367 6368define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { 6369; SSE-LABEL: test_mm_unpacklo_pd: 6370; SSE: # %bb.0: 6371; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 6372; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 6373; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6374; 6375; AVX1-LABEL: test_mm_unpacklo_pd: 6376; AVX1: # %bb.0: 6377; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 6378; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 6379; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6380; 6381; AVX512-LABEL: test_mm_unpacklo_pd: 6382; AVX512: # %bb.0: 6383; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 6384; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 6385; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6386 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 6387 ret <2 x double> %res 6388} 6389 6390define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 6391; SSE-LABEL: test_mm_xor_pd: 6392; SSE: # %bb.0: 6393; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6394; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6395; 6396; AVX1-LABEL: test_mm_xor_pd: 6397; AVX1: # %bb.0: 6398; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6399; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6400; 6401; AVX512-LABEL: test_mm_xor_pd: 6402; AVX512: # %bb.0: 6403; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6404; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6405 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 6406 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 6407 %res = xor <4 x i32> %arg0, %arg1 6408 %bc = bitcast <4 x i32> %res to <2 x double> 6409 ret <2 x double> %bc 6410} 6411 6412define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 6413; SSE-LABEL: test_mm_xor_si128: 6414; SSE: # %bb.0: 6415; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 6416; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6417; 6418; AVX1-LABEL: test_mm_xor_si128: 6419; AVX1: # %bb.0: 6420; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 6421; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6422; 6423; AVX512-LABEL: test_mm_xor_si128: 6424; AVX512: # %bb.0: 6425; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 6426; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 6427 %res = xor <2 x i64> %a0, %a1 6428 ret <2 x i64> %res 6429} 6430 6431!0 = !{i32 1} 6432 6433