1//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the X86 SSE instruction set, defining the instructions, 11// and properties of the instructions which are needed for code generation, 12// machine code emission, and analysis. 13// 14//===----------------------------------------------------------------------===// 15 16 17//===----------------------------------------------------------------------===// 18// SSE 1 & 2 Instructions Classes 19//===----------------------------------------------------------------------===// 20 21/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 22multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 23 RegisterClass RC, X86MemOperand x86memop, 24 bit Is2Addr = 1> { 25 let isCommutable = 1 in { 26 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 27 !if(Is2Addr, 28 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 29 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 30 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; 31 } 32 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 33 !if(Is2Addr, 34 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 35 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 36 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; 37} 38 39/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 40multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 41 string asm, string SSEVer, string FPSizeStr, 42 Operand memopr, ComplexPattern mem_cpat, 43 bit Is2Addr = 1> { 44 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 45 !if(Is2Addr, 46 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 47 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 48 [(set RC:$dst, (!cast<Intrinsic>( 49 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 50 RC:$src1, RC:$src2))]>; 51 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 52 !if(Is2Addr, 53 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 54 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 55 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 56 SSEVer, "_", OpcodeStr, FPSizeStr)) 57 RC:$src1, mem_cpat:$src2))]>; 58} 59 60/// sse12_fp_packed - SSE 1 & 2 packed instructions class 61multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 62 RegisterClass RC, ValueType vt, 63 X86MemOperand x86memop, PatFrag mem_frag, 64 Domain d, bit Is2Addr = 1> { 65 let isCommutable = 1 in 66 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 67 !if(Is2Addr, 68 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 69 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 70 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; 71 let mayLoad = 1 in 72 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 73 !if(Is2Addr, 74 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 75 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 76 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; 77} 78 79/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 80multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 81 string OpcodeStr, X86MemOperand x86memop, 82 list<dag> pat_rr, list<dag> pat_rm, 83 bit Is2Addr = 1> { 84 let isCommutable = 1 in 85 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 86 !if(Is2Addr, 87 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 88 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 89 pat_rr, d>; 90 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 91 !if(Is2Addr, 92 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 93 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 94 pat_rm, d>; 95} 96 97/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class 98multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 99 string asm, string SSEVer, string FPSizeStr, 100 X86MemOperand x86memop, PatFrag mem_frag, 101 Domain d, bit Is2Addr = 1> { 102 def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 103 !if(Is2Addr, 104 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 105 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 106 [(set RC:$dst, (!cast<Intrinsic>( 107 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) 108 RC:$src1, RC:$src2))], d>; 109 def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), 110 !if(Is2Addr, 111 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 112 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 113 [(set RC:$dst, (!cast<Intrinsic>( 114 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) 115 RC:$src1, (mem_frag addr:$src2)))], d>; 116} 117 118//===----------------------------------------------------------------------===// 119// Non-instruction patterns 120//===----------------------------------------------------------------------===// 121 122// A vector extract of the first f32/f64 position is a subregister copy 123def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 124 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 125def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 126 (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; 127 128// A 128-bit subvector extract from the first 256-bit vector position 129// is a subregister copy that needs no instruction. 130def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), 131 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 132def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), 133 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 134 135def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), 136 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 137def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), 138 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 139 140def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), 141 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 142def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), 143 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 144 145// A 128-bit subvector insert to the first 256-bit vector position 146// is a subregister copy that needs no instruction. 147def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), 148 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 149def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), 150 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 151def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), 152 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 153def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), 154 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 155def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), 156 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 157def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), 158 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 159 160// Implicitly promote a 32-bit scalar to a vector. 161def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 162 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; 163def : Pat<(v8f32 (scalar_to_vector FR32:$src)), 164 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; 165// Implicitly promote a 64-bit scalar to a vector. 166def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 167 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; 168def : Pat<(v4f64 (scalar_to_vector FR64:$src)), 169 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; 170 171// Bitcasts between 128-bit vector types. Return the original type since 172// no instruction is needed for the conversion 173let Predicates = [HasXMMInt] in { 174 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 175 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 176 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 177 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 178 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 179 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 180 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 181 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 182 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 183 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 184 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 185 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 186 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 187 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 188 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 189 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 190 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 191 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 192 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 193 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 194 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 195 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 196 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 197 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 198 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 199 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 200 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 201 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 202 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 203 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 204} 205 206// Bitcasts between 256-bit vector types. Return the original type since 207// no instruction is needed for the conversion 208let Predicates = [HasAVX] in { 209 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 210 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 211 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 212 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 213 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 214 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 215 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 216 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 217 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 218 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 219 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 220 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 221 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 222 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 223 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 224 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 225 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 226 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 227 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 228 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 229 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 230 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 231 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 232 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 233 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 234 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 235 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 236 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 237 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 238 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 239} 240 241// Alias instructions that map fld0 to pxor for sse. 242// This is expanded by ExpandPostRAPseudos. 243let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 244 isPseudo = 1 in { 245 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 246 [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; 247 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 248 [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; 249} 250 251//===----------------------------------------------------------------------===// 252// AVX & SSE - Zero/One Vectors 253//===----------------------------------------------------------------------===// 254 255// Alias instruction that maps zero vector to pxor / xorp* for sse. 256// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 257// swizzled by ExecutionDepsFix to pxor. 258// We set canFoldAsLoad because this can be converted to a constant-pool 259// load of an all-zeros value if folding it would be beneficial. 260let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 261 isPseudo = 1, neverHasSideEffects = 1 in { 262def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; 263} 264 265def : Pat<(v4f32 immAllZerosV), (V_SET0)>; 266def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 267def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 268def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 269def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 270def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 271 272 273// The same as done above but for AVX. The 256-bit ISA does not support PI, 274// and doesn't need it because on sandy bridge the register is set to zero 275// at the rename stage without using any execution unit, so SET0PSY 276// and SET0PDY can be used for vector int instructions without penalty 277// FIXME: Change encoding to pseudo! This is blocked right now by the x86 278// JIT implementatioan, it does not expand the instructions below like 279// X86MCInstLower does. 280let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 281 isCodeGenOnly = 1, Predicates = [HasAVX] in { 282def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", 283 [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; 284def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", 285 [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; 286} 287 288 289// AVX has no support for 256-bit integer instructions, but since the 128-bit 290// VPXOR instruction writes zero to its upper part, it's safe build zeros. 291def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 292def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 293 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 294 295def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 296def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 297 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 298 299// We set canFoldAsLoad because this can be converted to a constant-pool 300// load of an all-ones value if folding it would be beneficial. 301// FIXME: Change encoding to pseudo! This is blocked right now by the x86 302// JIT implementation, it does not expand the instructions below like 303// X86MCInstLower does. 304let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 305 isCodeGenOnly = 1, ExeDomain = SSEPackedInt in 306 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", 307 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 308let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 309 isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in 310 def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", 311 [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; 312 313 314//===----------------------------------------------------------------------===// 315// SSE 1 & 2 - Move FP Scalar Instructions 316// 317// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 318// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr 319// is used instead. Register-to-register movss/movsd is not modeled as an 320// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable 321// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. 322//===----------------------------------------------------------------------===// 323 324class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : 325 SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, 326 [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>; 327 328// Loading from memory automatically zeroing upper bits. 329class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 330 PatFrag mem_pat, string OpcodeStr> : 331 SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 332 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 333 [(set RC:$dst, (mem_pat addr:$src))]>; 334 335// AVX 336def VMOVSSrr : sse12_move_rr<FR32, v4f32, 337 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, 338 VEX_LIG; 339def VMOVSDrr : sse12_move_rr<FR64, v2f64, 340 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, 341 VEX_LIG; 342 343// For the disassembler 344let isCodeGenOnly = 1 in { 345 def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 346 (ins VR128:$src1, FR32:$src2), 347 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 348 XS, VEX_4V, VEX_LIG; 349 def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 350 (ins VR128:$src1, FR64:$src2), 351 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 352 XD, VEX_4V, VEX_LIG; 353} 354 355let canFoldAsLoad = 1, isReMaterializable = 1 in { 356 def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX, 357 VEX_LIG; 358 let AddedComplexity = 20 in 359 def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX, 360 VEX_LIG; 361} 362 363def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), 364 "movss\t{$src, $dst|$dst, $src}", 365 [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG; 366def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), 367 "movsd\t{$src, $dst|$dst, $src}", 368 [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG; 369 370// SSE1 & 2 371let Constraints = "$src1 = $dst" in { 372 def MOVSSrr : sse12_move_rr<FR32, v4f32, 373 "movss\t{$src2, $dst|$dst, $src2}">, XS; 374 def MOVSDrr : sse12_move_rr<FR64, v2f64, 375 "movsd\t{$src2, $dst|$dst, $src2}">, XD; 376 377 // For the disassembler 378 let isCodeGenOnly = 1 in { 379 def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 380 (ins VR128:$src1, FR32:$src2), 381 "movss\t{$src2, $dst|$dst, $src2}", []>, XS; 382 def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 383 (ins VR128:$src1, FR64:$src2), 384 "movsd\t{$src2, $dst|$dst, $src2}", []>, XD; 385 } 386} 387 388let canFoldAsLoad = 1, isReMaterializable = 1 in { 389 def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; 390 391 let AddedComplexity = 20 in 392 def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; 393} 394 395def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), 396 "movss\t{$src, $dst|$dst, $src}", 397 [(store FR32:$src, addr:$dst)]>; 398def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), 399 "movsd\t{$src, $dst|$dst, $src}", 400 [(store FR64:$src, addr:$dst)]>; 401 402// Patterns 403let Predicates = [HasSSE1] in { 404 let AddedComplexity = 15 in { 405 // Extract the low 32-bit value from one vector and insert it into another. 406 def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), 407 (MOVSSrr (v4f32 VR128:$src1), 408 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; 409 def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), 410 (MOVSSrr (v4i32 VR128:$src1), 411 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; 412 413 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 414 // MOVSS to the lower bits. 415 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 416 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 417 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 418 (MOVSSrr (v4f32 (V_SET0)), 419 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; 420 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 421 (MOVSSrr (v4i32 (V_SET0)), 422 (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; 423 } 424 425 let AddedComplexity = 20 in { 426 // MOVSSrm zeros the high parts of the register; represent this 427 // with SUBREG_TO_REG. 428 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 429 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; 430 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 431 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; 432 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 433 (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; 434 } 435 436 // Extract and store. 437 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 438 addr:$dst), 439 (MOVSSmr addr:$dst, 440 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 441 442 // Shuffle with MOVSS 443 def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), 444 (MOVSSrr VR128:$src1, FR32:$src2)>; 445 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 446 (MOVSSrr (v4i32 VR128:$src1), 447 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; 448 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 449 (MOVSSrr (v4f32 VR128:$src1), 450 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; 451} 452 453let Predicates = [HasSSE2] in { 454 let AddedComplexity = 15 in { 455 // Extract the low 64-bit value from one vector and insert it into another. 456 def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), 457 (MOVSDrr (v2f64 VR128:$src1), 458 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; 459 def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), 460 (MOVSDrr (v2i64 VR128:$src1), 461 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; 462 463 // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd 464 def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), 465 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; 466 def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), 467 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; 468 469 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 470 // MOVSD to the lower bits. 471 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 472 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 473 } 474 475 let AddedComplexity = 20 in { 476 // MOVSDrm zeros the high parts of the register; represent this 477 // with SUBREG_TO_REG. 478 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 479 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; 480 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 481 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; 482 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 483 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; 484 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 485 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; 486 def : Pat<(v2f64 (X86vzload addr:$src)), 487 (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; 488 } 489 490 // Extract and store. 491 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 492 addr:$dst), 493 (MOVSDmr addr:$dst, 494 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; 495 496 // Shuffle with MOVSD 497 def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), 498 (MOVSDrr VR128:$src1, FR64:$src2)>; 499 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 500 (MOVSDrr (v2i64 VR128:$src1), 501 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; 502 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 503 (MOVSDrr (v2f64 VR128:$src1), 504 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; 505 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 506 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; 507 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 508 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; 509 510 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 511 // is during lowering, where it's not possible to recognize the fold cause 512 // it has two uses through a bitcast. One use disappears at isel time and the 513 // fold opportunity reappears. 514 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 515 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; 516 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 517 (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; 518} 519 520let Predicates = [HasAVX] in { 521 let AddedComplexity = 15 in { 522 // Extract the low 32-bit value from one vector and insert it into another. 523 def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), 524 (VMOVSSrr (v4f32 VR128:$src1), 525 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; 526 def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), 527 (VMOVSSrr (v4i32 VR128:$src1), 528 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; 529 530 // Extract the low 64-bit value from one vector and insert it into another. 531 def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), 532 (VMOVSDrr (v2f64 VR128:$src1), 533 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; 534 def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), 535 (VMOVSDrr (v2i64 VR128:$src1), 536 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; 537 538 // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd 539 def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), 540 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; 541 def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), 542 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; 543 544 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 545 // MOVS{S,D} to the lower bits. 546 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 547 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 548 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 549 (VMOVSSrr (v4f32 (V_SET0)), 550 (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; 551 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 552 (VMOVSSrr (v4i32 (V_SET0)), 553 (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; 554 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 555 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 556 } 557 558 let AddedComplexity = 20 in { 559 // MOVSSrm zeros the high parts of the register; represent this 560 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 561 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 562 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; 563 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 564 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; 565 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 566 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; 567 568 // MOVSDrm zeros the high parts of the register; represent this 569 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 570 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 571 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; 572 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 573 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; 574 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 575 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; 576 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 577 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; 578 def : Pat<(v2f64 (X86vzload addr:$src)), 579 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; 580 581 // Represent the same patterns above but in the form they appear for 582 // 256-bit types 583 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 584 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), 585 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; 586 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 587 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), 588 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; 589 } 590 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 591 (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), 592 (SUBREG_TO_REG (i32 0), 593 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 594 sub_xmm)>; 595 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 596 (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), 597 (SUBREG_TO_REG (i64 0), 598 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 599 sub_xmm)>; 600 601 // Extract and store. 602 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 603 addr:$dst), 604 (VMOVSSmr addr:$dst, 605 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 606 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 607 addr:$dst), 608 (VMOVSDmr addr:$dst, 609 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; 610 611 // Shuffle with VMOVSS 612 def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), 613 (VMOVSSrr VR128:$src1, FR32:$src2)>; 614 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 615 (VMOVSSrr (v4i32 VR128:$src1), 616 (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; 617 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 618 (VMOVSSrr (v4f32 VR128:$src1), 619 (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; 620 621 // Shuffle with VMOVSD 622 def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), 623 (VMOVSDrr VR128:$src1, FR64:$src2)>; 624 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 625 (VMOVSDrr (v2i64 VR128:$src1), 626 (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; 627 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 628 (VMOVSDrr (v2f64 VR128:$src1), 629 (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; 630 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 631 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), 632 sub_sd))>; 633 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 634 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), 635 sub_sd))>; 636 637 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 638 // is during lowering, where it's not possible to recognize the fold cause 639 // it has two uses through a bitcast. One use disappears at isel time and the 640 // fold opportunity reappears. 641 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 642 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), 643 sub_sd))>; 644 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 645 (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), 646 sub_sd))>; 647} 648 649//===----------------------------------------------------------------------===// 650// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 651//===----------------------------------------------------------------------===// 652 653multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 654 X86MemOperand x86memop, PatFrag ld_frag, 655 string asm, Domain d, 656 bit IsReMaterializable = 1> { 657let neverHasSideEffects = 1 in 658 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 659 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>; 660let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 661 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 662 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 663 [(set RC:$dst, (ld_frag addr:$src))], d>; 664} 665 666defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 667 "movaps", SSEPackedSingle>, TB, VEX; 668defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 669 "movapd", SSEPackedDouble>, TB, OpSize, VEX; 670defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 671 "movups", SSEPackedSingle>, TB, VEX; 672defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 673 "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; 674 675defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 676 "movaps", SSEPackedSingle>, TB, VEX; 677defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 678 "movapd", SSEPackedDouble>, TB, OpSize, VEX; 679defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 680 "movups", SSEPackedSingle>, TB, VEX; 681defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 682 "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; 683defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 684 "movaps", SSEPackedSingle>, TB; 685defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 686 "movapd", SSEPackedDouble>, TB, OpSize; 687defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 688 "movups", SSEPackedSingle>, TB; 689defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 690 "movupd", SSEPackedDouble, 0>, TB, OpSize; 691 692def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 693 "movaps\t{$src, $dst|$dst, $src}", 694 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; 695def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 696 "movapd\t{$src, $dst|$dst, $src}", 697 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; 698def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 699 "movups\t{$src, $dst|$dst, $src}", 700 [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; 701def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 702 "movupd\t{$src, $dst|$dst, $src}", 703 [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; 704def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 705 "movaps\t{$src, $dst|$dst, $src}", 706 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; 707def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 708 "movapd\t{$src, $dst|$dst, $src}", 709 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; 710def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 711 "movups\t{$src, $dst|$dst, $src}", 712 [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; 713def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 714 "movupd\t{$src, $dst|$dst, $src}", 715 [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; 716 717// For disassembler 718let isCodeGenOnly = 1 in { 719 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 720 (ins VR128:$src), 721 "movaps\t{$src, $dst|$dst, $src}", []>, VEX; 722 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 723 (ins VR128:$src), 724 "movapd\t{$src, $dst|$dst, $src}", []>, VEX; 725 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 726 (ins VR128:$src), 727 "movups\t{$src, $dst|$dst, $src}", []>, VEX; 728 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 729 (ins VR128:$src), 730 "movupd\t{$src, $dst|$dst, $src}", []>, VEX; 731 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 732 (ins VR256:$src), 733 "movaps\t{$src, $dst|$dst, $src}", []>, VEX; 734 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 735 (ins VR256:$src), 736 "movapd\t{$src, $dst|$dst, $src}", []>, VEX; 737 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 738 (ins VR256:$src), 739 "movups\t{$src, $dst|$dst, $src}", []>, VEX; 740 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 741 (ins VR256:$src), 742 "movupd\t{$src, $dst|$dst, $src}", []>, VEX; 743} 744 745def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; 746def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 747 (VMOVUPSYmr addr:$dst, VR256:$src)>; 748 749def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>; 750def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 751 (VMOVUPDYmr addr:$dst, VR256:$src)>; 752 753def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 754 "movaps\t{$src, $dst|$dst, $src}", 755 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; 756def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 757 "movapd\t{$src, $dst|$dst, $src}", 758 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; 759def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 760 "movups\t{$src, $dst|$dst, $src}", 761 [(store (v4f32 VR128:$src), addr:$dst)]>; 762def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 763 "movupd\t{$src, $dst|$dst, $src}", 764 [(store (v2f64 VR128:$src), addr:$dst)]>; 765 766// For disassembler 767let isCodeGenOnly = 1 in { 768 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 769 "movaps\t{$src, $dst|$dst, $src}", []>; 770 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 771 "movapd\t{$src, $dst|$dst, $src}", []>; 772 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 773 "movups\t{$src, $dst|$dst, $src}", []>; 774 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 775 "movupd\t{$src, $dst|$dst, $src}", []>; 776} 777 778let Predicates = [HasAVX] in { 779 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 780 (VMOVUPSmr addr:$dst, VR128:$src)>; 781 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 782 (VMOVUPDmr addr:$dst, VR128:$src)>; 783} 784 785let Predicates = [HasSSE1] in 786 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 787 (MOVUPSmr addr:$dst, VR128:$src)>; 788let Predicates = [HasSSE2] in 789 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 790 (MOVUPDmr addr:$dst, VR128:$src)>; 791 792// Use movaps / movups for SSE integer load / store (one byte shorter). 793// The instructions selected below are then converted to MOVDQA/MOVDQU 794// during the SSE domain pass. 795let Predicates = [HasSSE1] in { 796 def : Pat<(alignedloadv4i32 addr:$src), 797 (MOVAPSrm addr:$src)>; 798 def : Pat<(loadv4i32 addr:$src), 799 (MOVUPSrm addr:$src)>; 800 def : Pat<(alignedloadv2i64 addr:$src), 801 (MOVAPSrm addr:$src)>; 802 def : Pat<(loadv2i64 addr:$src), 803 (MOVUPSrm addr:$src)>; 804 805 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 806 (MOVAPSmr addr:$dst, VR128:$src)>; 807 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 808 (MOVAPSmr addr:$dst, VR128:$src)>; 809 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 810 (MOVAPSmr addr:$dst, VR128:$src)>; 811 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 812 (MOVAPSmr addr:$dst, VR128:$src)>; 813 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 814 (MOVUPSmr addr:$dst, VR128:$src)>; 815 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 816 (MOVUPSmr addr:$dst, VR128:$src)>; 817 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 818 (MOVUPSmr addr:$dst, VR128:$src)>; 819 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 820 (MOVUPSmr addr:$dst, VR128:$src)>; 821} 822 823// Use vmovaps/vmovups for AVX integer load/store. 824let Predicates = [HasAVX] in { 825 // 128-bit load/store 826 def : Pat<(alignedloadv4i32 addr:$src), 827 (VMOVAPSrm addr:$src)>; 828 def : Pat<(loadv4i32 addr:$src), 829 (VMOVUPSrm addr:$src)>; 830 def : Pat<(alignedloadv2i64 addr:$src), 831 (VMOVAPSrm addr:$src)>; 832 def : Pat<(loadv2i64 addr:$src), 833 (VMOVUPSrm addr:$src)>; 834 835 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 836 (VMOVAPSmr addr:$dst, VR128:$src)>; 837 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 838 (VMOVAPSmr addr:$dst, VR128:$src)>; 839 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 840 (VMOVAPSmr addr:$dst, VR128:$src)>; 841 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 842 (VMOVAPSmr addr:$dst, VR128:$src)>; 843 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 844 (VMOVUPSmr addr:$dst, VR128:$src)>; 845 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 846 (VMOVUPSmr addr:$dst, VR128:$src)>; 847 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 848 (VMOVUPSmr addr:$dst, VR128:$src)>; 849 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 850 (VMOVUPSmr addr:$dst, VR128:$src)>; 851 852 // 256-bit load/store 853 def : Pat<(alignedloadv4i64 addr:$src), 854 (VMOVAPSYrm addr:$src)>; 855 def : Pat<(loadv4i64 addr:$src), 856 (VMOVUPSYrm addr:$src)>; 857 def : Pat<(alignedloadv8i32 addr:$src), 858 (VMOVAPSYrm addr:$src)>; 859 def : Pat<(loadv8i32 addr:$src), 860 (VMOVUPSYrm addr:$src)>; 861 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 862 (VMOVAPSYmr addr:$dst, VR256:$src)>; 863 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 864 (VMOVAPSYmr addr:$dst, VR256:$src)>; 865 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 866 (VMOVAPSYmr addr:$dst, VR256:$src)>; 867 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 868 (VMOVAPSYmr addr:$dst, VR256:$src)>; 869 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 870 (VMOVUPSYmr addr:$dst, VR256:$src)>; 871 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 872 (VMOVUPSYmr addr:$dst, VR256:$src)>; 873 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 874 (VMOVUPSYmr addr:$dst, VR256:$src)>; 875 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 876 (VMOVUPSYmr addr:$dst, VR256:$src)>; 877} 878 879// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper 880// bits are disregarded. FIXME: Set encoding to pseudo! 881let neverHasSideEffects = 1 in { 882def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 883 "movaps\t{$src, $dst|$dst, $src}", []>; 884def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 885 "movapd\t{$src, $dst|$dst, $src}", []>; 886def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 887 "movaps\t{$src, $dst|$dst, $src}", []>, VEX; 888def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 889 "movapd\t{$src, $dst|$dst, $src}", []>, VEX; 890} 891 892// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 893// bits are disregarded. FIXME: Set encoding to pseudo! 894let canFoldAsLoad = 1, isReMaterializable = 1 in { 895def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 896 "movaps\t{$src, $dst|$dst, $src}", 897 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; 898def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 899 "movapd\t{$src, $dst|$dst, $src}", 900 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; 901let isCodeGenOnly = 1 in { 902 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 903 "movaps\t{$src, $dst|$dst, $src}", 904 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX; 905 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 906 "movapd\t{$src, $dst|$dst, $src}", 907 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; 908} 909} 910 911//===----------------------------------------------------------------------===// 912// SSE 1 & 2 - Move Low packed FP Instructions 913//===----------------------------------------------------------------------===// 914 915multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, 916 PatFrag mov_frag, string base_opc, 917 string asm_opr> { 918 def PSrm : PI<opc, MRMSrcMem, 919 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 920 !strconcat(base_opc, "s", asm_opr), 921 [(set RC:$dst, 922 (mov_frag RC:$src1, 923 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 924 SSEPackedSingle>, TB; 925 926 def PDrm : PI<opc, MRMSrcMem, 927 (outs RC:$dst), (ins RC:$src1, f64mem:$src2), 928 !strconcat(base_opc, "d", asm_opr), 929 [(set RC:$dst, (v2f64 (mov_frag RC:$src1, 930 (scalar_to_vector (loadf64 addr:$src2)))))], 931 SSEPackedDouble>, TB, OpSize; 932} 933 934let AddedComplexity = 20 in { 935 defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", 936 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; 937} 938let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 939 defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", 940 "\t{$src2, $dst|$dst, $src2}">; 941} 942 943def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 944 "movlps\t{$src, $dst|$dst, $src}", 945 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 946 (iPTR 0))), addr:$dst)]>, VEX; 947def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 948 "movlpd\t{$src, $dst|$dst, $src}", 949 [(store (f64 (vector_extract (v2f64 VR128:$src), 950 (iPTR 0))), addr:$dst)]>, VEX; 951def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 952 "movlps\t{$src, $dst|$dst, $src}", 953 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 954 (iPTR 0))), addr:$dst)]>; 955def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 956 "movlpd\t{$src, $dst|$dst, $src}", 957 [(store (f64 (vector_extract (v2f64 VR128:$src), 958 (iPTR 0))), addr:$dst)]>; 959 960let Predicates = [HasAVX] in { 961 let AddedComplexity = 20 in { 962 // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS 963 def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), 964 (VMOVLPSrm VR128:$src1, addr:$src2)>; 965 def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), 966 (VMOVLPSrm VR128:$src1, addr:$src2)>; 967 // vector_shuffle v1, (load v2) <2, 1> using MOVLPS 968 def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), 969 (VMOVLPDrm VR128:$src1, addr:$src2)>; 970 def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), 971 (VMOVLPDrm VR128:$src1, addr:$src2)>; 972 } 973 974 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 975 def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 976 (VMOVLPSmr addr:$src1, VR128:$src2)>; 977 def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), 978 VR128:$src2)), addr:$src1), 979 (VMOVLPSmr addr:$src1, VR128:$src2)>; 980 981 // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS 982 def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 983 (VMOVLPDmr addr:$src1, VR128:$src2)>; 984 def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 985 (VMOVLPDmr addr:$src1, VR128:$src2)>; 986 987 // Shuffle with VMOVLPS 988 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 989 (VMOVLPSrm VR128:$src1, addr:$src2)>; 990 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 991 (VMOVLPSrm VR128:$src1, addr:$src2)>; 992 def : Pat<(X86Movlps VR128:$src1, 993 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 994 (VMOVLPSrm VR128:$src1, addr:$src2)>; 995 996 // Shuffle with VMOVLPD 997 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 998 (VMOVLPDrm VR128:$src1, addr:$src2)>; 999 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1000 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1001 def : Pat<(v2f64 (X86Movlpd VR128:$src1, 1002 (scalar_to_vector (loadf64 addr:$src2)))), 1003 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1004 1005 // Store patterns 1006 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1007 addr:$src1), 1008 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1009 def : Pat<(store (v4i32 (X86Movlps 1010 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1011 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1012 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1013 addr:$src1), 1014 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1015 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1016 addr:$src1), 1017 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1018} 1019 1020let Predicates = [HasSSE1] in { 1021 let AddedComplexity = 20 in { 1022 // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS 1023 def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), 1024 (MOVLPSrm VR128:$src1, addr:$src2)>; 1025 def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), 1026 (MOVLPSrm VR128:$src1, addr:$src2)>; 1027 } 1028 1029 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1030 def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 1031 (MOVLPSmr addr:$src1, VR128:$src2)>; 1032 def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), 1033 VR128:$src2)), addr:$src1), 1034 (MOVLPSmr addr:$src1, VR128:$src2)>; 1035 1036 // Shuffle with MOVLPS 1037 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1038 (MOVLPSrm VR128:$src1, addr:$src2)>; 1039 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1040 (MOVLPSrm VR128:$src1, addr:$src2)>; 1041 def : Pat<(X86Movlps VR128:$src1, 1042 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1043 (MOVLPSrm VR128:$src1, addr:$src2)>; 1044 1045 // Store patterns 1046 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1047 addr:$src1), 1048 (MOVLPSmr addr:$src1, VR128:$src2)>; 1049 def : Pat<(store (v4i32 (X86Movlps 1050 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1051 addr:$src1), 1052 (MOVLPSmr addr:$src1, VR128:$src2)>; 1053} 1054 1055let Predicates = [HasSSE2] in { 1056 let AddedComplexity = 20 in { 1057 // vector_shuffle v1, (load v2) <2, 1> using MOVLPS 1058 def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), 1059 (MOVLPDrm VR128:$src1, addr:$src2)>; 1060 def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), 1061 (MOVLPDrm VR128:$src1, addr:$src2)>; 1062 } 1063 1064 // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS 1065 def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 1066 (MOVLPDmr addr:$src1, VR128:$src2)>; 1067 def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), 1068 (MOVLPDmr addr:$src1, VR128:$src2)>; 1069 1070 // Shuffle with MOVLPD 1071 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1072 (MOVLPDrm VR128:$src1, addr:$src2)>; 1073 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1074 (MOVLPDrm VR128:$src1, addr:$src2)>; 1075 def : Pat<(v2f64 (X86Movlpd VR128:$src1, 1076 (scalar_to_vector (loadf64 addr:$src2)))), 1077 (MOVLPDrm VR128:$src1, addr:$src2)>; 1078 1079 // Store patterns 1080 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1081 addr:$src1), 1082 (MOVLPDmr addr:$src1, VR128:$src2)>; 1083 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1084 addr:$src1), 1085 (MOVLPDmr addr:$src1, VR128:$src2)>; 1086} 1087 1088//===----------------------------------------------------------------------===// 1089// SSE 1 & 2 - Move Hi packed FP Instructions 1090//===----------------------------------------------------------------------===// 1091 1092let AddedComplexity = 20 in { 1093 defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", 1094 "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; 1095} 1096let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1097 defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", 1098 "\t{$src2, $dst|$dst, $src2}">; 1099} 1100 1101// v2f64 extract element 1 is always custom lowered to unpack high to low 1102// and extract element 0 so the non-store version isn't too horrible. 1103def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1104 "movhps\t{$src, $dst|$dst, $src}", 1105 [(store (f64 (vector_extract 1106 (unpckh (bc_v2f64 (v4f32 VR128:$src)), 1107 (undef)), (iPTR 0))), addr:$dst)]>, 1108 VEX; 1109def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1110 "movhpd\t{$src, $dst|$dst, $src}", 1111 [(store (f64 (vector_extract 1112 (v2f64 (unpckh VR128:$src, (undef))), 1113 (iPTR 0))), addr:$dst)]>, 1114 VEX; 1115def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1116 "movhps\t{$src, $dst|$dst, $src}", 1117 [(store (f64 (vector_extract 1118 (unpckh (bc_v2f64 (v4f32 VR128:$src)), 1119 (undef)), (iPTR 0))), addr:$dst)]>; 1120def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1121 "movhpd\t{$src, $dst|$dst, $src}", 1122 [(store (f64 (vector_extract 1123 (v2f64 (unpckh VR128:$src, (undef))), 1124 (iPTR 0))), addr:$dst)]>; 1125 1126let Predicates = [HasAVX] in { 1127 // VMOVHPS patterns 1128 def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1129 (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; 1130 def : Pat<(X86Movlhps VR128:$src1, 1131 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1132 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1133 def : Pat<(X86Movlhps VR128:$src1, 1134 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1135 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1136 1137 // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem 1138 // is during lowering, where it's not possible to recognize the load fold cause 1139 // it has two uses through a bitcast. One use disappears at isel time and the 1140 // fold opportunity reappears. 1141 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, 1142 (scalar_to_vector (loadf64 addr:$src2)))), 1143 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1144 1145 // FIXME: This should be matched by a X86Movhpd instead. Same as above 1146 def : Pat<(v2f64 (X86Movlhpd VR128:$src1, 1147 (scalar_to_vector (loadf64 addr:$src2)))), 1148 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1149 1150 // Store patterns 1151 def : Pat<(store (f64 (vector_extract 1152 (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), 1153 (VMOVHPSmr addr:$dst, VR128:$src)>; 1154 def : Pat<(store (f64 (vector_extract 1155 (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), 1156 (VMOVHPDmr addr:$dst, VR128:$src)>; 1157} 1158 1159let Predicates = [HasSSE1] in { 1160 // MOVHPS patterns 1161 def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1162 (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; 1163 def : Pat<(X86Movlhps VR128:$src1, 1164 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1165 (MOVHPSrm VR128:$src1, addr:$src2)>; 1166 def : Pat<(X86Movlhps VR128:$src1, 1167 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1168 (MOVHPSrm VR128:$src1, addr:$src2)>; 1169 1170 // Store patterns 1171 def : Pat<(store (f64 (vector_extract 1172 (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), 1173 (MOVHPSmr addr:$dst, VR128:$src)>; 1174} 1175 1176let Predicates = [HasSSE2] in { 1177 // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem 1178 // is during lowering, where it's not possible to recognize the load fold cause 1179 // it has two uses through a bitcast. One use disappears at isel time and the 1180 // fold opportunity reappears. 1181 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, 1182 (scalar_to_vector (loadf64 addr:$src2)))), 1183 (MOVHPDrm VR128:$src1, addr:$src2)>; 1184 1185 // FIXME: This should be matched by a X86Movhpd instead. Same as above 1186 def : Pat<(v2f64 (X86Movlhpd VR128:$src1, 1187 (scalar_to_vector (loadf64 addr:$src2)))), 1188 (MOVHPDrm VR128:$src1, addr:$src2)>; 1189 1190 // Store patterns 1191 def : Pat<(store (f64 (vector_extract 1192 (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), 1193 (MOVHPDmr addr:$dst, VR128:$src)>; 1194} 1195 1196//===----------------------------------------------------------------------===// 1197// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1198//===----------------------------------------------------------------------===// 1199 1200let AddedComplexity = 20 in { 1201 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1202 (ins VR128:$src1, VR128:$src2), 1203 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1204 [(set VR128:$dst, 1205 (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, 1206 VEX_4V; 1207 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1208 (ins VR128:$src1, VR128:$src2), 1209 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1210 [(set VR128:$dst, 1211 (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, 1212 VEX_4V; 1213} 1214let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1215 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1216 (ins VR128:$src1, VR128:$src2), 1217 "movlhps\t{$src2, $dst|$dst, $src2}", 1218 [(set VR128:$dst, 1219 (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; 1220 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1221 (ins VR128:$src1, VR128:$src2), 1222 "movhlps\t{$src2, $dst|$dst, $src2}", 1223 [(set VR128:$dst, 1224 (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; 1225} 1226 1227let Predicates = [HasAVX] in { 1228 // MOVLHPS patterns 1229 let AddedComplexity = 20 in { 1230 def : Pat<(v4f32 (movddup VR128:$src, (undef))), 1231 (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; 1232 def : Pat<(v2i64 (movddup VR128:$src, (undef))), 1233 (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; 1234 1235 // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS 1236 def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), 1237 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1238 } 1239 def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), 1240 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1241 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1242 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1243 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1244 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1245 1246 // MOVHLPS patterns 1247 let AddedComplexity = 20 in { 1248 // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS 1249 def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), 1250 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1251 1252 // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS 1253 def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), 1254 (VMOVHLPSrr VR128:$src1, VR128:$src1)>; 1255 def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), 1256 (VMOVHLPSrr VR128:$src1, VR128:$src1)>; 1257 } 1258 1259 def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), 1260 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1261 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1262 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1263} 1264 1265let Predicates = [HasSSE1] in { 1266 // MOVLHPS patterns 1267 let AddedComplexity = 20 in { 1268 def : Pat<(v4f32 (movddup VR128:$src, (undef))), 1269 (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; 1270 def : Pat<(v2i64 (movddup VR128:$src, (undef))), 1271 (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; 1272 1273 // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS 1274 def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), 1275 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1276 } 1277 def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), 1278 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1279 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1280 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1281 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1282 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1283 1284 // MOVHLPS patterns 1285 let AddedComplexity = 20 in { 1286 // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS 1287 def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), 1288 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1289 1290 // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS 1291 def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), 1292 (MOVHLPSrr VR128:$src1, VR128:$src1)>; 1293 def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), 1294 (MOVHLPSrr VR128:$src1, VR128:$src1)>; 1295 } 1296 1297 def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), 1298 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1299 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1300 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1301} 1302 1303//===----------------------------------------------------------------------===// 1304// SSE 1 & 2 - Conversion Instructions 1305//===----------------------------------------------------------------------===// 1306 1307multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1308 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1309 string asm> { 1310 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1311 [(set DstRC:$dst, (OpNode SrcRC:$src))]>; 1312 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1313 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; 1314} 1315 1316multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1317 X86MemOperand x86memop, string asm> { 1318 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>; 1319 let mayLoad = 1 in 1320 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; 1321} 1322 1323multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1324 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1325 string asm, Domain d> { 1326 def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1327 [(set DstRC:$dst, (OpNode SrcRC:$src))], d>; 1328 def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1329 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>; 1330} 1331 1332multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1333 X86MemOperand x86memop, string asm> { 1334 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1335 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; 1336 let mayLoad = 1 in 1337 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1338 (ins DstRC:$src1, x86memop:$src), 1339 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; 1340} 1341 1342defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1343 "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, 1344 VEX_LIG; 1345defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1346 "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, 1347 VEX_W, VEX_LIG; 1348defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1349 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, 1350 VEX_LIG; 1351defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1352 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, 1353 VEX, VEX_W, VEX_LIG; 1354 1355// The assembler can recognize rr 64-bit instructions by seeing a rxx 1356// register, but the same isn't true when only using memory operands, 1357// provide other assembly "l" and "q" forms to address this explicitly 1358// where appropriate to do so. 1359defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, 1360 VEX_4V, VEX_LIG; 1361defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, 1362 VEX_4V, VEX_W, VEX_LIG; 1363defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, 1364 VEX_4V, VEX_LIG; 1365defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, 1366 VEX_4V, VEX_LIG; 1367defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, 1368 VEX_4V, VEX_W, VEX_LIG; 1369 1370let Predicates = [HasAVX] in { 1371 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1372 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1373 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1374 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1375 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1376 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1377 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1378 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1379 1380 def : Pat<(f32 (sint_to_fp GR32:$src)), 1381 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1382 def : Pat<(f32 (sint_to_fp GR64:$src)), 1383 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1384 def : Pat<(f64 (sint_to_fp GR32:$src)), 1385 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1386 def : Pat<(f64 (sint_to_fp GR64:$src)), 1387 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1388} 1389 1390defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1391 "cvttss2si\t{$src, $dst|$dst, $src}">, XS; 1392defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1393 "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; 1394defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1395 "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; 1396defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1397 "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; 1398defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1399 "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; 1400defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1401 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; 1402defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1403 "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; 1404defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1405 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; 1406 1407// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1408// and/or XMM operand(s). 1409 1410multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1411 Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, 1412 string asm> { 1413 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1414 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1415 [(set DstRC:$dst, (Int SrcRC:$src))]>; 1416 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), 1417 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1418 [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; 1419} 1420 1421multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1422 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1423 PatFrag ld_frag, string asm, bit Is2Addr = 1> { 1424 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1425 !if(Is2Addr, 1426 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1427 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1428 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; 1429 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1430 (ins DstRC:$src1, x86memop:$src2), 1431 !if(Is2Addr, 1432 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1433 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1434 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; 1435} 1436 1437defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1438 f128mem, load, "cvtsd2si">, XD, VEX; 1439defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1440 int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, 1441 XD, VEX, VEX_W; 1442 1443// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ 1444// Get rid of this hack or rename the intrinsics, there are several 1445// intructions that only match with the intrinsic form, why create duplicates 1446// to let them be recognized by the assembler? 1447defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, 1448 "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG; 1449defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, 1450 "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W, 1451 VEX_LIG; 1452 1453defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1454 f128mem, load, "cvtsd2si{l}">, XD; 1455defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1456 f128mem, load, "cvtsd2si{q}">, XD, REX_W; 1457 1458 1459defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1460 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; 1461defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1462 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, 1463 VEX_W; 1464defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1465 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; 1466defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1467 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, 1468 VEX_4V, VEX_W; 1469 1470let Constraints = "$src1 = $dst" in { 1471 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1472 int_x86_sse_cvtsi2ss, i32mem, loadi32, 1473 "cvtsi2ss">, XS; 1474 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1475 int_x86_sse_cvtsi642ss, i64mem, loadi64, 1476 "cvtsi2ss{q}">, XS, REX_W; 1477 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1478 int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1479 "cvtsi2sd">, XD; 1480 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1481 int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1482 "cvtsi2sd">, XD, REX_W; 1483} 1484 1485/// SSE 1 Only 1486 1487// Aliases for intrinsics 1488defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1489 f32mem, load, "cvttss2si">, XS, VEX; 1490defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1491 int_x86_sse_cvttss2si64, f32mem, load, 1492 "cvttss2si">, XS, VEX, VEX_W; 1493defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1494 f128mem, load, "cvttsd2si">, XD, VEX; 1495defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1496 int_x86_sse2_cvttsd2si64, f128mem, load, 1497 "cvttsd2si">, XD, VEX, VEX_W; 1498defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1499 f32mem, load, "cvttss2si">, XS; 1500defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1501 int_x86_sse_cvttss2si64, f32mem, load, 1502 "cvttss2si{q}">, XS, REX_W; 1503defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1504 f128mem, load, "cvttsd2si">, XD; 1505defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1506 int_x86_sse2_cvttsd2si64, f128mem, load, 1507 "cvttsd2si{q}">, XD, REX_W; 1508 1509let Pattern = []<dag> in { 1510defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, 1511 "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, 1512 VEX, VEX_LIG; 1513defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, 1514 "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, 1515 VEX_W, VEX_LIG; 1516defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, 1517 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1518 SSEPackedSingle>, TB, VEX; 1519defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, 1520 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1521 SSEPackedSingle>, TB, VEX; 1522} 1523 1524let Pattern = []<dag> in { 1525defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, 1526 "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; 1527defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, 1528 "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; 1529defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, 1530 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1531 SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ 1532} 1533 1534let Predicates = [HasSSE1] in { 1535 def : Pat<(int_x86_sse_cvtss2si VR128:$src), 1536 (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 1537 def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), 1538 (CVTSS2SIrm addr:$src)>; 1539 def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), 1540 (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 1541 def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), 1542 (CVTSS2SI64rm addr:$src)>; 1543} 1544 1545let Predicates = [HasAVX] in { 1546 def : Pat<(int_x86_sse_cvtss2si VR128:$src), 1547 (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 1548 def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), 1549 (VCVTSS2SIrm addr:$src)>; 1550 def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), 1551 (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; 1552 def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), 1553 (VCVTSS2SI64rm addr:$src)>; 1554} 1555 1556/// SSE 2 Only 1557 1558// Convert scalar double to scalar single 1559def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1560 (ins FR64:$src1, FR64:$src2), 1561 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 1562 VEX_4V, VEX_LIG; 1563let mayLoad = 1 in 1564def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1565 (ins FR64:$src1, f64mem:$src2), 1566 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1567 []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; 1568 1569def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1570 Requires<[HasAVX]>; 1571 1572def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1573 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1574 [(set FR32:$dst, (fround FR64:$src))]>; 1575def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1576 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1577 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, 1578 Requires<[HasSSE2, OptForSize]>; 1579 1580defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, 1581 int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, 1582 XS, VEX_4V; 1583let Constraints = "$src1 = $dst" in 1584defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, 1585 int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; 1586 1587// Convert scalar single to scalar double 1588// SSE2 instructions with XS prefix 1589def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1590 (ins FR32:$src1, FR32:$src2), 1591 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1592 []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; 1593let mayLoad = 1 in 1594def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1595 (ins FR32:$src1, f32mem:$src2), 1596 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1597 []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; 1598 1599let Predicates = [HasAVX] in { 1600 def : Pat<(f64 (fextend FR32:$src)), 1601 (VCVTSS2SDrr FR32:$src, FR32:$src)>; 1602 def : Pat<(fextend (loadf32 addr:$src)), 1603 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1604 def : Pat<(extloadf32 addr:$src), 1605 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1606} 1607 1608def : Pat<(extloadf32 addr:$src), 1609 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, 1610 Requires<[HasAVX, OptForSpeed]>; 1611 1612def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1613 "cvtss2sd\t{$src, $dst|$dst, $src}", 1614 [(set FR64:$dst, (fextend FR32:$src))]>, XS, 1615 Requires<[HasSSE2]>; 1616def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1617 "cvtss2sd\t{$src, $dst|$dst, $src}", 1618 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, 1619 Requires<[HasSSE2, OptForSize]>; 1620 1621// extload f32 -> f64. This matches load+fextend because we have a hack in 1622// the isel (PreprocessForFPConvert) that can introduce loads after dag 1623// combine. 1624// Since these loads aren't folded into the fextend, we have to match it 1625// explicitly here. 1626def : Pat<(fextend (loadf32 addr:$src)), 1627 (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; 1628def : Pat<(extloadf32 addr:$src), 1629 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; 1630 1631def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1632 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1633 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1634 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, 1635 VR128:$src2))]>, XS, VEX_4V, 1636 Requires<[HasAVX]>; 1637def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1638 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), 1639 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1640 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, 1641 (load addr:$src2)))]>, XS, VEX_4V, 1642 Requires<[HasAVX]>; 1643let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1644def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1645 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1646 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1647 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, 1648 VR128:$src2))]>, XS, 1649 Requires<[HasSSE2]>; 1650def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1651 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), 1652 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1653 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, 1654 (load addr:$src2)))]>, XS, 1655 Requires<[HasSSE2]>; 1656} 1657 1658// Convert doubleword to packed single/double fp 1659// SSE2 instructions without OpSize prefix 1660def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1661 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1662 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, 1663 TB, VEX, Requires<[HasAVX]>; 1664def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 1665 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1666 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps 1667 (bitconvert (memopv2i64 addr:$src))))]>, 1668 TB, VEX, Requires<[HasAVX]>; 1669def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1670 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1671 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, 1672 TB, Requires<[HasSSE2]>; 1673def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 1674 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1675 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps 1676 (bitconvert (memopv2i64 addr:$src))))]>, 1677 TB, Requires<[HasSSE2]>; 1678 1679// FIXME: why the non-intrinsic version is described as SSE3? 1680// SSE2 instructions with XS prefix 1681def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1682 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1683 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, 1684 XS, VEX, Requires<[HasAVX]>; 1685def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1686 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 1687 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd 1688 (bitconvert (memopv2i64 addr:$src))))]>, 1689 XS, VEX, Requires<[HasAVX]>; 1690def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1691 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1692 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, 1693 XS, Requires<[HasSSE2]>; 1694def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 1695 "cvtdq2pd\t{$src, $dst|$dst, $src}", 1696 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd 1697 (bitconvert (memopv2i64 addr:$src))))]>, 1698 XS, Requires<[HasSSE2]>; 1699 1700 1701// Convert packed single/double fp to doubleword 1702def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1703 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1704def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1705 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1706def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1707 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1708def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1709 "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1710def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1711 "cvtps2dq\t{$src, $dst|$dst, $src}", []>; 1712def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1713 "cvtps2dq\t{$src, $dst|$dst, $src}", []>; 1714 1715def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1716 "cvtps2dq\t{$src, $dst|$dst, $src}", 1717 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, 1718 VEX; 1719def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), 1720 (ins f128mem:$src), 1721 "cvtps2dq\t{$src, $dst|$dst, $src}", 1722 [(set VR128:$dst, (int_x86_sse2_cvtps2dq 1723 (memop addr:$src)))]>, VEX; 1724def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1725 "cvtps2dq\t{$src, $dst|$dst, $src}", 1726 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; 1727def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1728 "cvtps2dq\t{$src, $dst|$dst, $src}", 1729 [(set VR128:$dst, (int_x86_sse2_cvtps2dq 1730 (memop addr:$src)))]>; 1731 1732// SSE2 packed instructions with XD prefix 1733def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1734 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1735 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 1736 XD, VEX, Requires<[HasAVX]>; 1737def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1738 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1739 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq 1740 (memop addr:$src)))]>, 1741 XD, VEX, Requires<[HasAVX]>; 1742def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1743 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1744 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 1745 XD, Requires<[HasSSE2]>; 1746def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1747 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1748 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq 1749 (memop addr:$src)))]>, 1750 XD, Requires<[HasSSE2]>; 1751 1752 1753// Convert with truncation packed single/double fp to doubleword 1754// SSE2 packed instructions with XS prefix 1755def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1756 "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1757let mayLoad = 1 in 1758def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1759 "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1760def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1761 "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1762let mayLoad = 1 in 1763def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1764 "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1765def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1766 "cvttps2dq\t{$src, $dst|$dst, $src}", 1767 [(set VR128:$dst, 1768 (int_x86_sse2_cvttps2dq VR128:$src))]>; 1769def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1770 "cvttps2dq\t{$src, $dst|$dst, $src}", 1771 [(set VR128:$dst, 1772 (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; 1773 1774def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1775 "vcvttps2dq\t{$src, $dst|$dst, $src}", 1776 [(set VR128:$dst, 1777 (int_x86_sse2_cvttps2dq VR128:$src))]>, 1778 XS, VEX, Requires<[HasAVX]>; 1779def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1780 "vcvttps2dq\t{$src, $dst|$dst, $src}", 1781 [(set VR128:$dst, (int_x86_sse2_cvttps2dq 1782 (memop addr:$src)))]>, 1783 XS, VEX, Requires<[HasAVX]>; 1784 1785let Predicates = [HasSSE2] in { 1786 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 1787 (Int_CVTDQ2PSrr VR128:$src)>; 1788 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 1789 (CVTTPS2DQrr VR128:$src)>; 1790} 1791 1792let Predicates = [HasAVX] in { 1793 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 1794 (Int_VCVTDQ2PSrr VR128:$src)>; 1795 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 1796 (VCVTTPS2DQrr VR128:$src)>; 1797 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 1798 (VCVTDQ2PSYrr VR256:$src)>; 1799 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 1800 (VCVTTPS2DQYrr VR256:$src)>; 1801} 1802 1803def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1804 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1805 [(set VR128:$dst, 1806 (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX; 1807let isCodeGenOnly = 1 in 1808def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1809 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1810 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 1811 (memop addr:$src)))]>, VEX; 1812def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1813 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1814 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; 1815def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 1816 "cvttpd2dq\t{$src, $dst|$dst, $src}", 1817 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 1818 (memop addr:$src)))]>; 1819 1820// The assembler can recognize rr 256-bit instructions by seeing a ymm 1821// register, but the same isn't true when using memory operands instead. 1822// Provide other assembly rr and rm forms to address this explicitly. 1823def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1824 "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; 1825 1826// XMM only 1827def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1828 "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; 1829def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1830 "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; 1831 1832// YMM only 1833def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1834 "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; 1835def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1836 "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; 1837 1838// Convert packed single to packed double 1839let Predicates = [HasAVX] in { 1840 // SSE2 instructions without OpSize prefix 1841def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1842 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; 1843def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1844 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; 1845def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 1846 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; 1847def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 1848 "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; 1849} 1850def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1851 "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; 1852def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1853 "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; 1854 1855def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1856 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1857 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, 1858 TB, VEX, Requires<[HasAVX]>; 1859def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1860 "vcvtps2pd\t{$src, $dst|$dst, $src}", 1861 [(set VR128:$dst, (int_x86_sse2_cvtps2pd 1862 (load addr:$src)))]>, 1863 TB, VEX, Requires<[HasAVX]>; 1864def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1865 "cvtps2pd\t{$src, $dst|$dst, $src}", 1866 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, 1867 TB, Requires<[HasSSE2]>; 1868def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 1869 "cvtps2pd\t{$src, $dst|$dst, $src}", 1870 [(set VR128:$dst, (int_x86_sse2_cvtps2pd 1871 (load addr:$src)))]>, 1872 TB, Requires<[HasSSE2]>; 1873 1874// Convert packed double to packed single 1875// The assembler can recognize rr 256-bit instructions by seeing a ymm 1876// register, but the same isn't true when using memory operands instead. 1877// Provide other assembly rr and rm forms to address this explicitly. 1878def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1879 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; 1880def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1881 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; 1882 1883// XMM only 1884def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1885 "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; 1886def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1887 "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; 1888 1889// YMM only 1890def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1891 "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; 1892def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1893 "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; 1894def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1895 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; 1896def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1897 "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; 1898 1899 1900def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1901 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1902 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; 1903def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), 1904 (ins f128mem:$src), 1905 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1906 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps 1907 (memop addr:$src)))]>; 1908def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1909 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1910 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; 1911def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1912 "cvtpd2ps\t{$src, $dst|$dst, $src}", 1913 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps 1914 (memop addr:$src)))]>; 1915 1916// AVX 256-bit register conversion intrinsics 1917// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 1918// whenever possible to avoid declaring two versions of each one. 1919def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 1920 (VCVTDQ2PSYrr VR256:$src)>; 1921def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)), 1922 (VCVTDQ2PSYrm addr:$src)>; 1923 1924def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), 1925 (VCVTPD2PSYrr VR256:$src)>; 1926def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), 1927 (VCVTPD2PSYrm addr:$src)>; 1928 1929def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), 1930 (VCVTPS2DQYrr VR256:$src)>; 1931def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), 1932 (VCVTPS2DQYrm addr:$src)>; 1933 1934def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), 1935 (VCVTPS2PDYrr VR128:$src)>; 1936def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), 1937 (VCVTPS2PDYrm addr:$src)>; 1938 1939def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), 1940 (VCVTTPD2DQYrr VR256:$src)>; 1941def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), 1942 (VCVTTPD2DQYrm addr:$src)>; 1943 1944def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), 1945 (VCVTTPS2DQYrr VR256:$src)>; 1946def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), 1947 (VCVTTPS2DQYrm addr:$src)>; 1948 1949// Match fround and fextend for 128/256-bit conversions 1950def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 1951 (VCVTPD2PSYrr VR256:$src)>; 1952def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 1953 (VCVTPD2PSYrm addr:$src)>; 1954 1955def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 1956 (VCVTPS2PDYrr VR128:$src)>; 1957def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), 1958 (VCVTPS2PDYrm addr:$src)>; 1959 1960//===----------------------------------------------------------------------===// 1961// SSE 1 & 2 - Compare Instructions 1962//===----------------------------------------------------------------------===// 1963 1964// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 1965multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 1966 SDNode OpNode, ValueType VT, PatFrag ld_frag, 1967 string asm, string asm_alt> { 1968 def rr : SIi8<0xC2, MRMSrcReg, 1969 (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, 1970 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>; 1971 def rm : SIi8<0xC2, MRMSrcMem, 1972 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, 1973 [(set RC:$dst, (OpNode (VT RC:$src1), 1974 (ld_frag addr:$src2), imm:$cc))]>; 1975 1976 // Accept explicit immediate argument form instead of comparison code. 1977 let neverHasSideEffects = 1 in { 1978 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 1979 (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>; 1980 let mayLoad = 1 in 1981 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 1982 (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>; 1983 } 1984} 1985 1986defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, 1987 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1988 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, 1989 XS, VEX_4V, VEX_LIG; 1990defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, 1991 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1992 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, 1993 XD, VEX_4V, VEX_LIG; 1994 1995let Constraints = "$src1 = $dst" in { 1996 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, 1997 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 1998 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">, 1999 XS; 2000 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, 2001 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2002 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">, 2003 XD; 2004} 2005 2006multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, 2007 Intrinsic Int, string asm> { 2008 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2009 (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, 2010 [(set VR128:$dst, (Int VR128:$src1, 2011 VR128:$src, imm:$cc))]>; 2012 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2013 (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm, 2014 [(set VR128:$dst, (Int VR128:$src1, 2015 (load addr:$src), imm:$cc))]>; 2016} 2017 2018// Aliases to match intrinsics which expect XMM operand(s). 2019defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, 2020 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, 2021 XS, VEX_4V; 2022defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, 2023 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, 2024 XD, VEX_4V; 2025let Constraints = "$src1 = $dst" in { 2026 defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, 2027 "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS; 2028 defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, 2029 "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD; 2030} 2031 2032 2033// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2034multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2035 ValueType vt, X86MemOperand x86memop, 2036 PatFrag ld_frag, string OpcodeStr, Domain d> { 2037 def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2038 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2039 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>; 2040 def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2041 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2042 [(set EFLAGS, (OpNode (vt RC:$src1), 2043 (ld_frag addr:$src2)))], d>; 2044} 2045 2046let Defs = [EFLAGS] in { 2047 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2048 "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG; 2049 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2050 "ucomisd", SSEPackedDouble>, TB, OpSize, VEX, 2051 VEX_LIG; 2052 let Pattern = []<dag> in { 2053 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2054 "comiss", SSEPackedSingle>, TB, VEX, 2055 VEX_LIG; 2056 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2057 "comisd", SSEPackedDouble>, TB, OpSize, VEX, 2058 VEX_LIG; 2059 } 2060 2061 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2062 load, "ucomiss", SSEPackedSingle>, TB, VEX; 2063 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2064 load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX; 2065 2066 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2067 load, "comiss", SSEPackedSingle>, TB, VEX; 2068 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2069 load, "comisd", SSEPackedDouble>, TB, OpSize, VEX; 2070 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2071 "ucomiss", SSEPackedSingle>, TB; 2072 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2073 "ucomisd", SSEPackedDouble>, TB, OpSize; 2074 2075 let Pattern = []<dag> in { 2076 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2077 "comiss", SSEPackedSingle>, TB; 2078 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2079 "comisd", SSEPackedDouble>, TB, OpSize; 2080 } 2081 2082 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2083 load, "ucomiss", SSEPackedSingle>, TB; 2084 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2085 load, "ucomisd", SSEPackedDouble>, TB, OpSize; 2086 2087 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2088 "comiss", SSEPackedSingle>, TB; 2089 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2090 "comisd", SSEPackedDouble>, TB, OpSize; 2091} // Defs = [EFLAGS] 2092 2093// sse12_cmp_packed - sse 1 & 2 compared packed instructions 2094multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2095 Intrinsic Int, string asm, string asm_alt, 2096 Domain d> { 2097 let isAsmParserOnly = 1 in { 2098 def rri : PIi8<0xC2, MRMSrcReg, 2099 (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, 2100 [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>; 2101 def rmi : PIi8<0xC2, MRMSrcMem, 2102 (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm, 2103 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>; 2104 } 2105 2106 // Accept explicit immediate argument form instead of comparison code. 2107 def rri_alt : PIi8<0xC2, MRMSrcReg, 2108 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), 2109 asm_alt, [], d>; 2110 def rmi_alt : PIi8<0xC2, MRMSrcMem, 2111 (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc), 2112 asm_alt, [], d>; 2113} 2114 2115defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, 2116 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2117 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2118 SSEPackedSingle>, TB, VEX_4V; 2119defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, 2120 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2121 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2122 SSEPackedDouble>, TB, OpSize, VEX_4V; 2123defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, 2124 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2125 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2126 SSEPackedSingle>, TB, VEX_4V; 2127defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, 2128 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2129 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2130 SSEPackedDouble>, TB, OpSize, VEX_4V; 2131let Constraints = "$src1 = $dst" in { 2132 defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, 2133 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2134 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2135 SSEPackedSingle>, TB; 2136 defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, 2137 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2138 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2139 SSEPackedDouble>, TB, OpSize; 2140} 2141 2142let Predicates = [HasSSE1] in { 2143def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2144 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2145def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2146 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2147} 2148 2149let Predicates = [HasSSE2] in { 2150def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2151 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2152def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2153 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2154} 2155 2156let Predicates = [HasAVX] in { 2157def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2158 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2159def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2160 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2161def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2162 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2163def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2164 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2165 2166def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2167 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2168def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), 2169 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2170def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2171 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2172def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), 2173 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2174} 2175 2176//===----------------------------------------------------------------------===// 2177// SSE 1 & 2 - Shuffle Instructions 2178//===----------------------------------------------------------------------===// 2179 2180/// sse12_shuffle - sse 1 & 2 shuffle instructions 2181multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2182 ValueType vt, string asm, PatFrag mem_frag, 2183 Domain d, bit IsConvertibleToThreeAddress = 0> { 2184 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2185 (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm, 2186 [(set RC:$dst, (vt (shufp:$src3 2187 RC:$src1, (mem_frag addr:$src2))))], d>; 2188 let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 2189 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2190 (ins RC:$src1, RC:$src2, i8imm:$src3), asm, 2191 [(set RC:$dst, 2192 (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; 2193} 2194 2195defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2196 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2197 memopv4f32, SSEPackedSingle>, TB, VEX_4V; 2198defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2199 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2200 memopv8f32, SSEPackedSingle>, TB, VEX_4V; 2201defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2202 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", 2203 memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V; 2204defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2205 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", 2206 memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V; 2207 2208let Constraints = "$src1 = $dst" in { 2209 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2210 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2211 memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, 2212 TB; 2213 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2214 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2215 memopv2f64, SSEPackedDouble>, TB, OpSize; 2216} 2217 2218let Predicates = [HasSSE1] in { 2219 def : Pat<(v4f32 (X86Shufps VR128:$src1, 2220 (memopv4f32 addr:$src2), (i8 imm:$imm))), 2221 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2222 def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2223 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2224 def : Pat<(v4i32 (X86Shufps VR128:$src1, 2225 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2226 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2227 def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2228 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2229 // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but 2230 // fall back to this for SSE1) 2231 def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), 2232 (SHUFPSrri VR128:$src2, VR128:$src1, 2233 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2234 // Special unary SHUFPSrri case. 2235 def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), 2236 (SHUFPSrri VR128:$src1, VR128:$src1, 2237 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2238} 2239 2240let Predicates = [HasSSE2] in { 2241 // Special binary v4i32 shuffle cases with SHUFPS. 2242 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), 2243 (SHUFPSrri VR128:$src1, VR128:$src2, 2244 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2245 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, 2246 (bc_v4i32 (memopv2i64 addr:$src2)))), 2247 (SHUFPSrmi VR128:$src1, addr:$src2, 2248 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2249 // Special unary SHUFPDrri cases. 2250 def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), 2251 (SHUFPDrri VR128:$src1, VR128:$src1, 2252 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2253 def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), 2254 (SHUFPDrri VR128:$src1, VR128:$src1, 2255 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2256 // Special binary v2i64 shuffle cases using SHUFPDrri. 2257 def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), 2258 (SHUFPDrri VR128:$src1, VR128:$src2, 2259 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2260 // Generic SHUFPD patterns 2261 def : Pat<(v2f64 (X86Shufps VR128:$src1, 2262 (memopv2f64 addr:$src2), (i8 imm:$imm))), 2263 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2264 def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2265 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2266 def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2267 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2268} 2269 2270let Predicates = [HasAVX] in { 2271 def : Pat<(v4f32 (X86Shufps VR128:$src1, 2272 (memopv4f32 addr:$src2), (i8 imm:$imm))), 2273 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2274 def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2275 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2276 def : Pat<(v4i32 (X86Shufps VR128:$src1, 2277 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2278 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2279 def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2280 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2281 // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but 2282 // fall back to this for SSE1) 2283 def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), 2284 (VSHUFPSrri VR128:$src2, VR128:$src1, 2285 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2286 // Special unary SHUFPSrri case. 2287 def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), 2288 (VSHUFPSrri VR128:$src1, VR128:$src1, 2289 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2290 // Special binary v4i32 shuffle cases with SHUFPS. 2291 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), 2292 (VSHUFPSrri VR128:$src1, VR128:$src2, 2293 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2294 def : Pat<(v4i32 (shufp:$src3 VR128:$src1, 2295 (bc_v4i32 (memopv2i64 addr:$src2)))), 2296 (VSHUFPSrmi VR128:$src1, addr:$src2, 2297 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2298 // Special unary SHUFPDrri cases. 2299 def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), 2300 (VSHUFPDrri VR128:$src1, VR128:$src1, 2301 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2302 def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), 2303 (VSHUFPDrri VR128:$src1, VR128:$src1, 2304 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2305 // Special binary v2i64 shuffle cases using SHUFPDrri. 2306 def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), 2307 (VSHUFPDrri VR128:$src1, VR128:$src2, 2308 (SHUFFLE_get_shuf_imm VR128:$src3))>; 2309 2310 def : Pat<(v2f64 (X86Shufps VR128:$src1, 2311 (memopv2f64 addr:$src2), (i8 imm:$imm))), 2312 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2313 def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2314 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2315 def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2316 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2317 2318 // 256-bit patterns 2319 def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2320 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2321 def : Pat<(v8i32 (X86Shufps VR256:$src1, 2322 (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 2323 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2324 2325 def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2326 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2327 def : Pat<(v8f32 (X86Shufps VR256:$src1, 2328 (memopv8f32 addr:$src2), (i8 imm:$imm))), 2329 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2330 2331 def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2332 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2333 def : Pat<(v4i64 (X86Shufpd VR256:$src1, 2334 (memopv4i64 addr:$src2), (i8 imm:$imm))), 2335 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2336 2337 def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2338 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2339 def : Pat<(v4f64 (X86Shufpd VR256:$src1, 2340 (memopv4f64 addr:$src2), (i8 imm:$imm))), 2341 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2342} 2343 2344//===----------------------------------------------------------------------===// 2345// SSE 1 & 2 - Unpack Instructions 2346//===----------------------------------------------------------------------===// 2347 2348/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave 2349multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, 2350 PatFrag mem_frag, RegisterClass RC, 2351 X86MemOperand x86memop, string asm, 2352 Domain d> { 2353 def rr : PI<opc, MRMSrcReg, 2354 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2355 asm, [(set RC:$dst, 2356 (vt (OpNode RC:$src1, RC:$src2)))], d>; 2357 def rm : PI<opc, MRMSrcMem, 2358 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2359 asm, [(set RC:$dst, 2360 (vt (OpNode RC:$src1, 2361 (mem_frag addr:$src2))))], d>; 2362} 2363 2364let AddedComplexity = 10 in { 2365 defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, 2366 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2367 SSEPackedSingle>, TB, VEX_4V; 2368 defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, 2369 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2370 SSEPackedDouble>, TB, OpSize, VEX_4V; 2371 defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, 2372 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2373 SSEPackedSingle>, TB, VEX_4V; 2374 defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, 2375 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2376 SSEPackedDouble>, TB, OpSize, VEX_4V; 2377 2378 defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, 2379 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2380 SSEPackedSingle>, TB, VEX_4V; 2381 defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, 2382 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2383 SSEPackedDouble>, TB, OpSize, VEX_4V; 2384 defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, 2385 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2386 SSEPackedSingle>, TB, VEX_4V; 2387 defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, 2388 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2389 SSEPackedDouble>, TB, OpSize, VEX_4V; 2390 2391 let Constraints = "$src1 = $dst" in { 2392 defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, 2393 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2394 SSEPackedSingle>, TB; 2395 defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, 2396 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2397 SSEPackedDouble>, TB, OpSize; 2398 defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, 2399 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2400 SSEPackedSingle>, TB; 2401 defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, 2402 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2403 SSEPackedDouble>, TB, OpSize; 2404 } // Constraints = "$src1 = $dst" 2405} // AddedComplexity 2406 2407let Predicates = [HasSSE1] in { 2408 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), 2409 (UNPCKLPSrm VR128:$src1, addr:$src2)>; 2410 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), 2411 (UNPCKLPSrr VR128:$src1, VR128:$src2)>; 2412 def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), 2413 (UNPCKHPSrm VR128:$src1, addr:$src2)>; 2414 def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), 2415 (UNPCKHPSrr VR128:$src1, VR128:$src2)>; 2416} 2417 2418let Predicates = [HasSSE2] in { 2419 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), 2420 (UNPCKLPDrm VR128:$src1, addr:$src2)>; 2421 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), 2422 (UNPCKLPDrr VR128:$src1, VR128:$src2)>; 2423 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), 2424 (UNPCKHPDrm VR128:$src1, addr:$src2)>; 2425 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), 2426 (UNPCKHPDrr VR128:$src1, VR128:$src2)>; 2427 2428 // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the 2429 // problem is during lowering, where it's not possible to recognize the load 2430 // fold cause it has two uses through a bitcast. One use disappears at isel 2431 // time and the fold opportunity reappears. 2432 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2433 (UNPCKLPDrr VR128:$src, VR128:$src)>; 2434 2435 let AddedComplexity = 10 in 2436 def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), 2437 (UNPCKLPDrr VR128:$src, VR128:$src)>; 2438} 2439 2440let Predicates = [HasAVX] in { 2441 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), 2442 (VUNPCKLPSrm VR128:$src1, addr:$src2)>; 2443 def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), 2444 (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; 2445 def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), 2446 (VUNPCKHPSrm VR128:$src1, addr:$src2)>; 2447 def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), 2448 (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; 2449 2450 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), 2451 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2452 def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), 2453 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2454 def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), 2455 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2456 def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), 2457 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2458 def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), 2459 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2460 def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), 2461 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2462 def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), 2463 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2464 def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), 2465 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2466 2467 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), 2468 (VUNPCKLPDrm VR128:$src1, addr:$src2)>; 2469 def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), 2470 (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; 2471 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), 2472 (VUNPCKHPDrm VR128:$src1, addr:$src2)>; 2473 def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), 2474 (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; 2475 2476 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), 2477 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2478 def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), 2479 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2480 def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), 2481 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2482 def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), 2483 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2484 def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), 2485 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2486 def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), 2487 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2488 def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), 2489 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2490 def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), 2491 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2492 2493 // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the 2494 // problem is during lowering, where it's not possible to recognize the load 2495 // fold cause it has two uses through a bitcast. One use disappears at isel 2496 // time and the fold opportunity reappears. 2497 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2498 (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2499 let AddedComplexity = 10 in 2500 def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), 2501 (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2502} 2503 2504//===----------------------------------------------------------------------===// 2505// SSE 1 & 2 - Extract Floating-Point Sign mask 2506//===----------------------------------------------------------------------===// 2507 2508/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2509multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2510 Domain d> { 2511 def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), 2512 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2513 [(set GR32:$dst, (Int RC:$src))], d>; 2514 def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), 2515 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; 2516} 2517 2518defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2519 SSEPackedSingle>, TB; 2520defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2521 SSEPackedDouble>, TB, OpSize; 2522 2523def : Pat<(i32 (X86fgetsign FR32:$src)), 2524 (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, 2525 sub_ss))>, Requires<[HasSSE1]>; 2526def : Pat<(i64 (X86fgetsign FR32:$src)), 2527 (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, 2528 sub_ss))>, Requires<[HasSSE1]>; 2529def : Pat<(i32 (X86fgetsign FR64:$src)), 2530 (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, 2531 sub_sd))>, Requires<[HasSSE2]>; 2532def : Pat<(i64 (X86fgetsign FR64:$src)), 2533 (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, 2534 sub_sd))>, Requires<[HasSSE2]>; 2535 2536let Predicates = [HasAVX] in { 2537 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2538 "movmskps", SSEPackedSingle>, TB, VEX; 2539 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2540 "movmskpd", SSEPackedDouble>, TB, 2541 OpSize, VEX; 2542 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2543 "movmskps", SSEPackedSingle>, TB, VEX; 2544 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2545 "movmskpd", SSEPackedDouble>, TB, 2546 OpSize, VEX; 2547 2548 def : Pat<(i32 (X86fgetsign FR32:$src)), 2549 (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, 2550 sub_ss))>; 2551 def : Pat<(i64 (X86fgetsign FR32:$src)), 2552 (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, 2553 sub_ss))>; 2554 def : Pat<(i32 (X86fgetsign FR64:$src)), 2555 (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, 2556 sub_sd))>; 2557 def : Pat<(i64 (X86fgetsign FR64:$src)), 2558 (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, 2559 sub_sd))>; 2560 2561 // Assembler Only 2562 def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 2563 "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; 2564 def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 2565 "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, 2566 OpSize, VEX; 2567 def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), 2568 "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; 2569 def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), 2570 "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, 2571 OpSize, VEX; 2572} 2573 2574//===----------------------------------------------------------------------===// 2575// SSE 1 & 2 - Logical Instructions 2576//===----------------------------------------------------------------------===// 2577 2578/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops 2579/// 2580multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, 2581 SDNode OpNode> { 2582 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2583 FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V; 2584 2585 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2586 FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V; 2587 2588 let Constraints = "$src1 = $dst" in { 2589 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2590 f32, f128mem, memopfsf32, SSEPackedSingle>, TB; 2591 2592 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2593 f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize; 2594 } 2595} 2596 2597// Alias bitwise logical operations using SSE logical ops on packed FP values. 2598let mayLoad = 0 in { 2599 defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; 2600 defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; 2601 defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; 2602} 2603 2604let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in 2605 defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; 2606 2607/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2608/// 2609multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2610 SDNode OpNode> { 2611 // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2612 // are all promoted to v2i64, and the patterns are covered by the int 2613 // version. This is needed in SSE only, because v2i64 isn't supported on 2614 // SSE1, but only on SSE2. 2615 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2616 !strconcat(OpcodeStr, "ps"), f128mem, [], 2617 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2618 (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; 2619 2620 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2621 !strconcat(OpcodeStr, "pd"), f128mem, 2622 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2623 (bc_v2i64 (v2f64 VR128:$src2))))], 2624 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2625 (memopv2i64 addr:$src2)))], 0>, 2626 TB, OpSize, VEX_4V; 2627 let Constraints = "$src1 = $dst" in { 2628 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2629 !strconcat(OpcodeStr, "ps"), f128mem, 2630 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2631 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2632 (memopv2i64 addr:$src2)))]>, TB; 2633 2634 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2635 !strconcat(OpcodeStr, "pd"), f128mem, 2636 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2637 (bc_v2i64 (v2f64 VR128:$src2))))], 2638 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2639 (memopv2i64 addr:$src2)))]>, TB, OpSize; 2640 } 2641} 2642 2643/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms 2644/// 2645multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, 2646 SDNode OpNode> { 2647 defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2648 !strconcat(OpcodeStr, "ps"), f256mem, 2649 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2650 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2651 (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V; 2652 2653 defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2654 !strconcat(OpcodeStr, "pd"), f256mem, 2655 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2656 (bc_v4i64 (v4f64 VR256:$src2))))], 2657 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2658 (memopv4i64 addr:$src2)))], 0>, 2659 TB, OpSize, VEX_4V; 2660} 2661 2662// AVX 256-bit packed logical ops forms 2663defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; 2664defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; 2665defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; 2666defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; 2667 2668defm AND : sse12_fp_packed_logical<0x54, "and", and>; 2669defm OR : sse12_fp_packed_logical<0x56, "or", or>; 2670defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2671let isCommutable = 0 in 2672 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2673 2674//===----------------------------------------------------------------------===// 2675// SSE 1 & 2 - Arithmetic Instructions 2676//===----------------------------------------------------------------------===// 2677 2678/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2679/// vector forms. 2680/// 2681/// In addition, we also have a special variant of the scalar form here to 2682/// represent the associated intrinsic operation. This form is unlike the 2683/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2684/// and leaves the top elements unmodified (therefore these cannot be commuted). 2685/// 2686/// These three forms can each be reg+reg or reg+mem. 2687/// 2688 2689/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2690/// classes below 2691multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2692 bit Is2Addr = 1> { 2693 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2694 OpNode, FR32, f32mem, Is2Addr>, XS; 2695 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2696 OpNode, FR64, f64mem, Is2Addr>, XD; 2697} 2698 2699multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 2700 bit Is2Addr = 1> { 2701 let mayLoad = 0 in { 2702 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2703 v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; 2704 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2705 v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; 2706 } 2707} 2708 2709multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, 2710 SDNode OpNode> { 2711 let mayLoad = 0 in { 2712 defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, 2713 v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB; 2714 defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, 2715 v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize; 2716 } 2717} 2718 2719multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2720 bit Is2Addr = 1> { 2721 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2722 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; 2723 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2724 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD; 2725} 2726 2727multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, 2728 bit Is2Addr = 1> { 2729 defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, 2730 !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, 2731 SSEPackedSingle, Is2Addr>, TB; 2732 2733 defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, 2734 !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, 2735 SSEPackedDouble, Is2Addr>, TB, OpSize; 2736} 2737 2738multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { 2739 defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, 2740 !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, 2741 SSEPackedSingle, 0>, TB; 2742 2743 defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, 2744 !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, 2745 SSEPackedDouble, 0>, TB, OpSize; 2746} 2747 2748// Binary Arithmetic instructions 2749defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, 2750 basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG; 2751defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, 2752 basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; 2753defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, 2754 basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG; 2755defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, 2756 basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; 2757 2758let isCommutable = 0 in { 2759 defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, 2760 basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG; 2761 defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, 2762 basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; 2763 defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, 2764 basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG; 2765 defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, 2766 basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; 2767 defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, 2768 basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG; 2769 defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, 2770 basic_sse12_fp_binop_p_int<0x5F, "max", 0>, 2771 basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, 2772 basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; 2773 defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, 2774 basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG; 2775 defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, 2776 basic_sse12_fp_binop_p_int<0x5D, "min", 0>, 2777 basic_sse12_fp_binop_p_y_int<0x5D, "min">, 2778 basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; 2779} 2780 2781let Constraints = "$src1 = $dst" in { 2782 defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, 2783 basic_sse12_fp_binop_p<0x58, "add", fadd>, 2784 basic_sse12_fp_binop_s_int<0x58, "add">; 2785 defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, 2786 basic_sse12_fp_binop_p<0x59, "mul", fmul>, 2787 basic_sse12_fp_binop_s_int<0x59, "mul">; 2788 2789 let isCommutable = 0 in { 2790 defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, 2791 basic_sse12_fp_binop_p<0x5C, "sub", fsub>, 2792 basic_sse12_fp_binop_s_int<0x5C, "sub">; 2793 defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, 2794 basic_sse12_fp_binop_p<0x5E, "div", fdiv>, 2795 basic_sse12_fp_binop_s_int<0x5E, "div">; 2796 defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, 2797 basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, 2798 basic_sse12_fp_binop_s_int<0x5F, "max">, 2799 basic_sse12_fp_binop_p_int<0x5F, "max">; 2800 defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, 2801 basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, 2802 basic_sse12_fp_binop_s_int<0x5D, "min">, 2803 basic_sse12_fp_binop_p_int<0x5D, "min">; 2804 } 2805} 2806 2807/// Unop Arithmetic 2808/// In addition, we also have a special variant of the scalar form here to 2809/// represent the associated intrinsic operation. This form is unlike the 2810/// plain scalar form, in that it takes an entire vector (instead of a 2811/// scalar) and leaves the top elements undefined. 2812/// 2813/// And, we have a special variant form for a full-vector intrinsic form. 2814 2815/// sse1_fp_unop_s - SSE1 unops in scalar form. 2816multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, 2817 SDNode OpNode, Intrinsic F32Int> { 2818 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 2819 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 2820 [(set FR32:$dst, (OpNode FR32:$src))]>; 2821 // For scalar unary operations, fold a load into the operation 2822 // only in OptForSize mode. It eliminates an instruction, but it also 2823 // eliminates a whole-register clobber (the load), so it introduces a 2824 // partial register update condition. 2825 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 2826 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 2827 [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, 2828 Requires<[HasSSE1, OptForSize]>; 2829 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2830 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 2831 [(set VR128:$dst, (F32Int VR128:$src))]>; 2832 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), 2833 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 2834 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; 2835} 2836 2837/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. 2838multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { 2839 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), 2840 !strconcat(OpcodeStr, 2841 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2842 let mayLoad = 1 in 2843 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), 2844 !strconcat(OpcodeStr, 2845 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2846 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 2847 (ins ssmem:$src1, VR128:$src2), 2848 !strconcat(OpcodeStr, 2849 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2850} 2851 2852/// sse1_fp_unop_p - SSE1 unops in packed form. 2853multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> { 2854 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2855 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2856 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>; 2857 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2858 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2859 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; 2860} 2861 2862/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. 2863multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { 2864 def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2865 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2866 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>; 2867 def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2868 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2869 [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>; 2870} 2871 2872/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 2873multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 2874 Intrinsic V4F32Int> { 2875 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2876 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2877 [(set VR128:$dst, (V4F32Int VR128:$src))]>; 2878 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2879 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2880 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; 2881} 2882 2883/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. 2884multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, 2885 Intrinsic V4F32Int> { 2886 def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2887 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2888 [(set VR256:$dst, (V4F32Int VR256:$src))]>; 2889 def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2890 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 2891 [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>; 2892} 2893 2894/// sse2_fp_unop_s - SSE2 unops in scalar form. 2895multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, 2896 SDNode OpNode, Intrinsic F64Int> { 2897 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 2898 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 2899 [(set FR64:$dst, (OpNode FR64:$src))]>; 2900 // See the comments in sse1_fp_unop_s for why this is OptForSize. 2901 def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), 2902 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 2903 [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, 2904 Requires<[HasSSE2, OptForSize]>; 2905 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2906 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 2907 [(set VR128:$dst, (F64Int VR128:$src))]>; 2908 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), 2909 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 2910 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; 2911} 2912 2913/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. 2914multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { 2915 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), 2916 !strconcat(OpcodeStr, 2917 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2918 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), 2919 !strconcat(OpcodeStr, 2920 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2921 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), 2922 (ins VR128:$src1, sdmem:$src2), 2923 !strconcat(OpcodeStr, 2924 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 2925} 2926 2927/// sse2_fp_unop_p - SSE2 unops in vector forms. 2928multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 2929 SDNode OpNode> { 2930 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2931 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2932 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>; 2933 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2934 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2935 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>; 2936} 2937 2938/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. 2939multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { 2940 def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2941 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2942 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>; 2943 def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2944 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2945 [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>; 2946} 2947 2948/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. 2949multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, 2950 Intrinsic V2F64Int> { 2951 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2952 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2953 [(set VR128:$dst, (V2F64Int VR128:$src))]>; 2954 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2955 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2956 [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; 2957} 2958 2959/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. 2960multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, 2961 Intrinsic V2F64Int> { 2962 def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2963 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2964 [(set VR256:$dst, (V2F64Int VR256:$src))]>; 2965 def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2966 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 2967 [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; 2968} 2969 2970let Predicates = [HasAVX] in { 2971 // Square root. 2972 defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, 2973 sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; 2974 2975 defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, 2976 sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, 2977 sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, 2978 sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, 2979 sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>, 2980 sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>, 2981 sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>, 2982 sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>, 2983 VEX; 2984 2985 // Reciprocal approximations. Note that these typically require refinement 2986 // in order to obtain suitable precision. 2987 defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; 2988 defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, 2989 sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, 2990 sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, 2991 sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; 2992 2993 defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; 2994 defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, 2995 sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, 2996 sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, 2997 sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; 2998} 2999 3000def : Pat<(f32 (fsqrt FR32:$src)), 3001 (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3002def : Pat<(f32 (fsqrt (load addr:$src))), 3003 (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3004 Requires<[HasAVX, OptForSize]>; 3005def : Pat<(f64 (fsqrt FR64:$src)), 3006 (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; 3007def : Pat<(f64 (fsqrt (load addr:$src))), 3008 (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, 3009 Requires<[HasAVX, OptForSize]>; 3010 3011def : Pat<(f32 (X86frsqrt FR32:$src)), 3012 (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3013def : Pat<(f32 (X86frsqrt (load addr:$src))), 3014 (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3015 Requires<[HasAVX, OptForSize]>; 3016 3017def : Pat<(f32 (X86frcp FR32:$src)), 3018 (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3019def : Pat<(f32 (X86frcp (load addr:$src))), 3020 (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3021 Requires<[HasAVX, OptForSize]>; 3022 3023let Predicates = [HasAVX] in { 3024 def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3025 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), 3026 (VSQRTSSr (f32 (IMPLICIT_DEF)), 3027 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), 3028 sub_ss)>; 3029 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), 3030 (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3031 3032 def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), 3033 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), 3034 (VSQRTSDr (f64 (IMPLICIT_DEF)), 3035 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), 3036 sub_sd)>; 3037 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), 3038 (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; 3039 3040 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3041 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), 3042 (VRSQRTSSr (f32 (IMPLICIT_DEF)), 3043 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), 3044 sub_ss)>; 3045 def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), 3046 (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3047 3048 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3049 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), 3050 (VRCPSSr (f32 (IMPLICIT_DEF)), 3051 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), 3052 sub_ss)>; 3053 def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), 3054 (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3055} 3056 3057// Square root. 3058defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, 3059 sse1_fp_unop_p<0x51, "sqrt", fsqrt>, 3060 sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, 3061 sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, 3062 sse2_fp_unop_p<0x51, "sqrt", fsqrt>, 3063 sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; 3064 3065// Reciprocal approximations. Note that these typically require refinement 3066// in order to obtain suitable precision. 3067defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, 3068 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, 3069 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; 3070defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, 3071 sse1_fp_unop_p<0x53, "rcp", X86frcp>, 3072 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; 3073 3074// There is no f64 version of the reciprocal approximation instructions. 3075 3076//===----------------------------------------------------------------------===// 3077// SSE 1 & 2 - Non-temporal stores 3078//===----------------------------------------------------------------------===// 3079 3080let AddedComplexity = 400 in { // Prefer non-temporal versions 3081 def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3082 (ins f128mem:$dst, VR128:$src), 3083 "movntps\t{$src, $dst|$dst, $src}", 3084 [(alignednontemporalstore (v4f32 VR128:$src), 3085 addr:$dst)]>, VEX; 3086 def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3087 (ins f128mem:$dst, VR128:$src), 3088 "movntpd\t{$src, $dst|$dst, $src}", 3089 [(alignednontemporalstore (v2f64 VR128:$src), 3090 addr:$dst)]>, VEX; 3091 def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), 3092 (ins f128mem:$dst, VR128:$src), 3093 "movntdq\t{$src, $dst|$dst, $src}", 3094 [(alignednontemporalstore (v2f64 VR128:$src), 3095 addr:$dst)]>, VEX; 3096 3097 let ExeDomain = SSEPackedInt in 3098 def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3099 (ins f128mem:$dst, VR128:$src), 3100 "movntdq\t{$src, $dst|$dst, $src}", 3101 [(alignednontemporalstore (v4f32 VR128:$src), 3102 addr:$dst)]>, VEX; 3103 3104 def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3105 (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; 3106 3107 def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3108 (ins f256mem:$dst, VR256:$src), 3109 "movntps\t{$src, $dst|$dst, $src}", 3110 [(alignednontemporalstore (v8f32 VR256:$src), 3111 addr:$dst)]>, VEX; 3112 def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3113 (ins f256mem:$dst, VR256:$src), 3114 "movntpd\t{$src, $dst|$dst, $src}", 3115 [(alignednontemporalstore (v4f64 VR256:$src), 3116 addr:$dst)]>, VEX; 3117 def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), 3118 (ins f256mem:$dst, VR256:$src), 3119 "movntdq\t{$src, $dst|$dst, $src}", 3120 [(alignednontemporalstore (v4f64 VR256:$src), 3121 addr:$dst)]>, VEX; 3122 let ExeDomain = SSEPackedInt in 3123 def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3124 (ins f256mem:$dst, VR256:$src), 3125 "movntdq\t{$src, $dst|$dst, $src}", 3126 [(alignednontemporalstore (v8f32 VR256:$src), 3127 addr:$dst)]>, VEX; 3128} 3129 3130def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), 3131 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3132def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src), 3133 (VMOVNTPDYmr addr:$dst, VR256:$src)>; 3134def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src), 3135 (VMOVNTPSYmr addr:$dst, VR256:$src)>; 3136 3137let AddedComplexity = 400 in { // Prefer non-temporal versions 3138def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3139 "movntps\t{$src, $dst|$dst, $src}", 3140 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 3141def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3142 "movntpd\t{$src, $dst|$dst, $src}", 3143 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; 3144 3145def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3146 "movntdq\t{$src, $dst|$dst, $src}", 3147 [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; 3148 3149let ExeDomain = SSEPackedInt in 3150def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3151 "movntdq\t{$src, $dst|$dst, $src}", 3152 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; 3153 3154def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3155 (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; 3156 3157// There is no AVX form for instructions below this point 3158def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3159 "movnti{l}\t{$src, $dst|$dst, $src}", 3160 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, 3161 TB, Requires<[HasSSE2]>; 3162def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3163 "movnti{q}\t{$src, $dst|$dst, $src}", 3164 [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, 3165 TB, Requires<[HasSSE2]>; 3166} 3167 3168//===----------------------------------------------------------------------===// 3169// SSE 1 & 2 - Prefetch and memory fence 3170//===----------------------------------------------------------------------===// 3171 3172// Prefetch intrinsic. 3173def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), 3174 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; 3175def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), 3176 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; 3177def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), 3178 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; 3179def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), 3180 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; 3181 3182// Flush cache 3183def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3184 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, 3185 TB, Requires<[HasSSE2]>; 3186 3187// Pause. This "instruction" is encoded as "rep; nop", so even though it 3188// was introduced with SSE2, it's backward compatible. 3189def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; 3190 3191// Load, store, and memory fence 3192def SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3193 "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; 3194def LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3195 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; 3196def MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3197 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; 3198 3199def : Pat<(X86SFence), (SFENCE)>; 3200def : Pat<(X86LFence), (LFENCE)>; 3201def : Pat<(X86MFence), (MFENCE)>; 3202 3203//===----------------------------------------------------------------------===// 3204// SSE 1 & 2 - Load/Store XCSR register 3205//===----------------------------------------------------------------------===// 3206 3207def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3208 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; 3209def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3210 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; 3211 3212def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3213 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; 3214def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3215 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; 3216 3217//===---------------------------------------------------------------------===// 3218// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3219//===---------------------------------------------------------------------===// 3220 3221let ExeDomain = SSEPackedInt in { // SSE integer instructions 3222 3223let neverHasSideEffects = 1 in { 3224def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3225 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3226def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3227 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3228} 3229def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3230 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; 3231def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3232 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; 3233 3234// For Disassembler 3235let isCodeGenOnly = 1 in { 3236def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3237 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3238def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3239 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3240def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3241 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; 3242def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3243 "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; 3244} 3245 3246let canFoldAsLoad = 1, mayLoad = 1 in { 3247def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3248 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3249def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3250 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3251let Predicates = [HasAVX] in { 3252 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3253 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; 3254 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3255 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; 3256} 3257} 3258 3259let mayStore = 1 in { 3260def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3261 (ins i128mem:$dst, VR128:$src), 3262 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3263def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3264 (ins i256mem:$dst, VR256:$src), 3265 "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; 3266let Predicates = [HasAVX] in { 3267def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3268 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; 3269def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3270 "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; 3271} 3272} 3273 3274let neverHasSideEffects = 1 in 3275def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3276 "movdqa\t{$src, $dst|$dst, $src}", []>; 3277 3278def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3279 "movdqu\t{$src, $dst|$dst, $src}", 3280 []>, XS, Requires<[HasSSE2]>; 3281 3282// For Disassembler 3283let isCodeGenOnly = 1 in { 3284def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3285 "movdqa\t{$src, $dst|$dst, $src}", []>; 3286 3287def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3288 "movdqu\t{$src, $dst|$dst, $src}", 3289 []>, XS, Requires<[HasSSE2]>; 3290} 3291 3292let canFoldAsLoad = 1, mayLoad = 1 in { 3293def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3294 "movdqa\t{$src, $dst|$dst, $src}", 3295 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; 3296def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3297 "movdqu\t{$src, $dst|$dst, $src}", 3298 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, 3299 XS, Requires<[HasSSE2]>; 3300} 3301 3302let mayStore = 1 in { 3303def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3304 "movdqa\t{$src, $dst|$dst, $src}", 3305 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; 3306def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3307 "movdqu\t{$src, $dst|$dst, $src}", 3308 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, 3309 XS, Requires<[HasSSE2]>; 3310} 3311 3312// Intrinsic forms of MOVDQU load and store 3313def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3314 "vmovdqu\t{$src, $dst|$dst, $src}", 3315 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, 3316 XS, VEX, Requires<[HasAVX]>; 3317 3318def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3319 "movdqu\t{$src, $dst|$dst, $src}", 3320 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, 3321 XS, Requires<[HasSSE2]>; 3322 3323} // ExeDomain = SSEPackedInt 3324 3325let Predicates = [HasAVX] in { 3326 def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; 3327 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 3328 (VMOVDQUYmr addr:$dst, VR256:$src)>; 3329} 3330 3331//===---------------------------------------------------------------------===// 3332// SSE2 - Packed Integer Arithmetic Instructions 3333//===---------------------------------------------------------------------===// 3334 3335let ExeDomain = SSEPackedInt in { // SSE integer instructions 3336 3337multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 3338 bit IsCommutable = 0, bit Is2Addr = 1> { 3339 let isCommutable = IsCommutable in 3340 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 3341 (ins VR128:$src1, VR128:$src2), 3342 !if(Is2Addr, 3343 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3344 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3345 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; 3346 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 3347 (ins VR128:$src1, i128mem:$src2), 3348 !if(Is2Addr, 3349 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3350 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3351 [(set VR128:$dst, (IntId VR128:$src1, 3352 (bitconvert (memopv2i64 addr:$src2))))]>; 3353} 3354 3355multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, 3356 string OpcodeStr, Intrinsic IntId, 3357 Intrinsic IntId2, bit Is2Addr = 1> { 3358 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 3359 (ins VR128:$src1, VR128:$src2), 3360 !if(Is2Addr, 3361 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3362 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3363 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; 3364 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 3365 (ins VR128:$src1, i128mem:$src2), 3366 !if(Is2Addr, 3367 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3368 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3369 [(set VR128:$dst, (IntId VR128:$src1, 3370 (bitconvert (memopv2i64 addr:$src2))))]>; 3371 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), 3372 (ins VR128:$src1, i32i8imm:$src2), 3373 !if(Is2Addr, 3374 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3375 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3376 [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; 3377} 3378 3379/// PDI_binop_rm - Simple SSE2 binary operator. 3380multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 3381 ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { 3382 let isCommutable = IsCommutable in 3383 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 3384 (ins VR128:$src1, VR128:$src2), 3385 !if(Is2Addr, 3386 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3387 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3388 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>; 3389 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 3390 (ins VR128:$src1, i128mem:$src2), 3391 !if(Is2Addr, 3392 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3393 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3394 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, 3395 (bitconvert (memopv2i64 addr:$src2)))))]>; 3396} 3397 3398/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. 3399/// 3400/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew 3401/// to collapse (bitconvert VT to VT) into its operand. 3402/// 3403multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, 3404 bit IsCommutable = 0, bit Is2Addr = 1> { 3405 let isCommutable = IsCommutable in 3406 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), 3407 (ins VR128:$src1, VR128:$src2), 3408 !if(Is2Addr, 3409 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3410 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3411 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; 3412 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), 3413 (ins VR128:$src1, i128mem:$src2), 3414 !if(Is2Addr, 3415 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3416 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3417 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; 3418} 3419 3420} // ExeDomain = SSEPackedInt 3421 3422// 128-bit Integer Arithmetic 3423 3424let Predicates = [HasAVX] in { 3425defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; 3426defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; 3427defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; 3428defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; 3429defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; 3430defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; 3431defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; 3432defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; 3433defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; 3434 3435// Intrinsic forms 3436defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, 3437 VEX_4V; 3438defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, 3439 VEX_4V; 3440defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, 3441 VEX_4V; 3442defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, 3443 VEX_4V; 3444defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, 3445 VEX_4V; 3446defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, 3447 VEX_4V; 3448defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, 3449 VEX_4V; 3450defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, 3451 VEX_4V; 3452defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, 3453 VEX_4V; 3454defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, 3455 VEX_4V; 3456defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, 3457 VEX_4V; 3458defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, 3459 VEX_4V; 3460defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, 3461 VEX_4V; 3462defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, 3463 VEX_4V; 3464defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, 3465 VEX_4V; 3466defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, 3467 VEX_4V; 3468defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, 3469 VEX_4V; 3470defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, 3471 VEX_4V; 3472defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, 3473 VEX_4V; 3474} 3475 3476let Constraints = "$src1 = $dst" in { 3477defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; 3478defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; 3479defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; 3480defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; 3481defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; 3482defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; 3483defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; 3484defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; 3485defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; 3486 3487// Intrinsic forms 3488defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; 3489defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; 3490defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; 3491defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; 3492defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; 3493defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; 3494defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; 3495defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; 3496defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; 3497defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; 3498defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; 3499defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; 3500defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; 3501defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; 3502defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; 3503defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; 3504defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; 3505defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; 3506defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; 3507 3508} // Constraints = "$src1 = $dst" 3509 3510//===---------------------------------------------------------------------===// 3511// SSE2 - Packed Integer Logical Instructions 3512//===---------------------------------------------------------------------===// 3513 3514let Predicates = [HasAVX] in { 3515defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", 3516 int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, 3517 VEX_4V; 3518defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", 3519 int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, 3520 VEX_4V; 3521defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", 3522 int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, 3523 VEX_4V; 3524 3525defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", 3526 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, 3527 VEX_4V; 3528defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", 3529 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, 3530 VEX_4V; 3531defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", 3532 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, 3533 VEX_4V; 3534 3535defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", 3536 int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, 3537 VEX_4V; 3538defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", 3539 int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, 3540 VEX_4V; 3541 3542defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; 3543defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; 3544defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; 3545 3546let ExeDomain = SSEPackedInt in { 3547 let neverHasSideEffects = 1 in { 3548 // 128-bit logical shifts. 3549 def VPSLLDQri : PDIi8<0x73, MRM7r, 3550 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3551 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3552 VEX_4V; 3553 def VPSRLDQri : PDIi8<0x73, MRM3r, 3554 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3555 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3556 VEX_4V; 3557 // PSRADQri doesn't exist in SSE[1-3]. 3558 } 3559 def VPANDNrr : PDI<0xDF, MRMSrcReg, 3560 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3561 "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3562 [(set VR128:$dst, 3563 (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; 3564 3565 def VPANDNrm : PDI<0xDF, MRMSrcMem, 3566 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3567 "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3568 [(set VR128:$dst, (X86andnp VR128:$src1, 3569 (memopv2i64 addr:$src2)))]>, VEX_4V; 3570} 3571} 3572 3573let Constraints = "$src1 = $dst" in { 3574defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", 3575 int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; 3576defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", 3577 int_x86_sse2_psll_d, int_x86_sse2_pslli_d>; 3578defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", 3579 int_x86_sse2_psll_q, int_x86_sse2_pslli_q>; 3580 3581defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", 3582 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>; 3583defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", 3584 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>; 3585defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", 3586 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>; 3587 3588defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", 3589 int_x86_sse2_psra_w, int_x86_sse2_psrai_w>; 3590defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", 3591 int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; 3592 3593defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; 3594defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; 3595defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; 3596 3597let ExeDomain = SSEPackedInt in { 3598 let neverHasSideEffects = 1 in { 3599 // 128-bit logical shifts. 3600 def PSLLDQri : PDIi8<0x73, MRM7r, 3601 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3602 "pslldq\t{$src2, $dst|$dst, $src2}", []>; 3603 def PSRLDQri : PDIi8<0x73, MRM3r, 3604 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 3605 "psrldq\t{$src2, $dst|$dst, $src2}", []>; 3606 // PSRADQri doesn't exist in SSE[1-3]. 3607 } 3608 def PANDNrr : PDI<0xDF, MRMSrcReg, 3609 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3610 "pandn\t{$src2, $dst|$dst, $src2}", []>; 3611 3612 def PANDNrm : PDI<0xDF, MRMSrcMem, 3613 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3614 "pandn\t{$src2, $dst|$dst, $src2}", []>; 3615} 3616} // Constraints = "$src1 = $dst" 3617 3618let Predicates = [HasAVX] in { 3619 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 3620 (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3621 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 3622 (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3623 def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), 3624 (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; 3625 def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), 3626 (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; 3627 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 3628 (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3629 3630 // Shift up / down and insert zero's. 3631 def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), 3632 (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; 3633 def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), 3634 (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; 3635} 3636 3637let Predicates = [HasSSE2] in { 3638 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 3639 (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3640 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 3641 (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3642 def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), 3643 (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>; 3644 def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), 3645 (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>; 3646 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 3647 (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; 3648 3649 // Shift up / down and insert zero's. 3650 def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), 3651 (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; 3652 def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), 3653 (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; 3654} 3655 3656//===---------------------------------------------------------------------===// 3657// SSE2 - Packed Integer Comparison Instructions 3658//===---------------------------------------------------------------------===// 3659 3660let Predicates = [HasAVX] in { 3661 defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, 3662 0>, VEX_4V; 3663 defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, 3664 0>, VEX_4V; 3665 defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1, 3666 0>, VEX_4V; 3667 defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0, 3668 0>, VEX_4V; 3669 defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0, 3670 0>, VEX_4V; 3671 defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, 3672 0>, VEX_4V; 3673 3674 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), 3675 (VPCMPEQBrr VR128:$src1, VR128:$src2)>; 3676 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), 3677 (VPCMPEQBrm VR128:$src1, addr:$src2)>; 3678 def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), 3679 (VPCMPEQWrr VR128:$src1, VR128:$src2)>; 3680 def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), 3681 (VPCMPEQWrm VR128:$src1, addr:$src2)>; 3682 def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), 3683 (VPCMPEQDrr VR128:$src1, VR128:$src2)>; 3684 def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), 3685 (VPCMPEQDrm VR128:$src1, addr:$src2)>; 3686 3687 def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), 3688 (VPCMPGTBrr VR128:$src1, VR128:$src2)>; 3689 def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), 3690 (VPCMPGTBrm VR128:$src1, addr:$src2)>; 3691 def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), 3692 (VPCMPGTWrr VR128:$src1, VR128:$src2)>; 3693 def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), 3694 (VPCMPGTWrm VR128:$src1, addr:$src2)>; 3695 def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), 3696 (VPCMPGTDrr VR128:$src1, VR128:$src2)>; 3697 def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), 3698 (VPCMPGTDrm VR128:$src1, addr:$src2)>; 3699} 3700 3701let Constraints = "$src1 = $dst" in { 3702 defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>; 3703 defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>; 3704 defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>; 3705 defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; 3706 defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; 3707 defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; 3708} // Constraints = "$src1 = $dst" 3709 3710let Predicates = [HasSSE2] in { 3711 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), 3712 (PCMPEQBrr VR128:$src1, VR128:$src2)>; 3713 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), 3714 (PCMPEQBrm VR128:$src1, addr:$src2)>; 3715 def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), 3716 (PCMPEQWrr VR128:$src1, VR128:$src2)>; 3717 def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), 3718 (PCMPEQWrm VR128:$src1, addr:$src2)>; 3719 def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), 3720 (PCMPEQDrr VR128:$src1, VR128:$src2)>; 3721 def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), 3722 (PCMPEQDrm VR128:$src1, addr:$src2)>; 3723 3724 def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), 3725 (PCMPGTBrr VR128:$src1, VR128:$src2)>; 3726 def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), 3727 (PCMPGTBrm VR128:$src1, addr:$src2)>; 3728 def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), 3729 (PCMPGTWrr VR128:$src1, VR128:$src2)>; 3730 def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), 3731 (PCMPGTWrm VR128:$src1, addr:$src2)>; 3732 def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), 3733 (PCMPGTDrr VR128:$src1, VR128:$src2)>; 3734 def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), 3735 (PCMPGTDrm VR128:$src1, addr:$src2)>; 3736} 3737 3738//===---------------------------------------------------------------------===// 3739// SSE2 - Packed Integer Pack Instructions 3740//===---------------------------------------------------------------------===// 3741 3742let Predicates = [HasAVX] in { 3743defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, 3744 0, 0>, VEX_4V; 3745defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, 3746 0, 0>, VEX_4V; 3747defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, 3748 0, 0>, VEX_4V; 3749} 3750 3751let Constraints = "$src1 = $dst" in { 3752defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; 3753defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; 3754defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; 3755} // Constraints = "$src1 = $dst" 3756 3757//===---------------------------------------------------------------------===// 3758// SSE2 - Packed Integer Shuffle Instructions 3759//===---------------------------------------------------------------------===// 3760 3761let ExeDomain = SSEPackedInt in { 3762multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, 3763 PatFrag bc_frag> { 3764def ri : Ii8<0x70, MRMSrcReg, 3765 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), 3766 !strconcat(OpcodeStr, 3767 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3768 [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1, 3769 (undef))))]>; 3770def mi : Ii8<0x70, MRMSrcMem, 3771 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), 3772 !strconcat(OpcodeStr, 3773 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3774 [(set VR128:$dst, (vt (pshuf_frag:$src2 3775 (bc_frag (memopv2i64 addr:$src1)), 3776 (undef))))]>; 3777} 3778} // ExeDomain = SSEPackedInt 3779 3780let Predicates = [HasAVX] in { 3781 let AddedComplexity = 5 in 3782 defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize, 3783 VEX; 3784 3785 // SSE2 with ImmT == Imm8 and XS prefix. 3786 defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS, 3787 VEX; 3788 3789 // SSE2 with ImmT == Imm8 and XD prefix. 3790 defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, 3791 VEX; 3792 3793 let AddedComplexity = 5 in 3794 def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), 3795 (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; 3796 // Unary v4f32 shuffle with VPSHUF* in order to fold a load. 3797 def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), 3798 (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; 3799 3800 def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), 3801 (i8 imm:$imm))), 3802 (VPSHUFDmi addr:$src1, imm:$imm)>; 3803 def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), 3804 (i8 imm:$imm))), 3805 (VPSHUFDmi addr:$src1, imm:$imm)>; 3806 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 3807 (VPSHUFDri VR128:$src1, imm:$imm)>; 3808 def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 3809 (VPSHUFDri VR128:$src1, imm:$imm)>; 3810 def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), 3811 (VPSHUFHWri VR128:$src, imm:$imm)>; 3812 def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), 3813 (i8 imm:$imm))), 3814 (VPSHUFHWmi addr:$src, imm:$imm)>; 3815 def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), 3816 (VPSHUFLWri VR128:$src, imm:$imm)>; 3817 def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), 3818 (i8 imm:$imm))), 3819 (VPSHUFLWmi addr:$src, imm:$imm)>; 3820} 3821 3822let Predicates = [HasSSE2] in { 3823 let AddedComplexity = 5 in 3824 defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; 3825 3826 // SSE2 with ImmT == Imm8 and XS prefix. 3827 defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS; 3828 3829 // SSE2 with ImmT == Imm8 and XD prefix. 3830 defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; 3831 3832 let AddedComplexity = 5 in 3833 def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), 3834 (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; 3835 // Unary v4f32 shuffle with PSHUF* in order to fold a load. 3836 def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), 3837 (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; 3838 3839 def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), 3840 (i8 imm:$imm))), 3841 (PSHUFDmi addr:$src1, imm:$imm)>; 3842 def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), 3843 (i8 imm:$imm))), 3844 (PSHUFDmi addr:$src1, imm:$imm)>; 3845 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 3846 (PSHUFDri VR128:$src1, imm:$imm)>; 3847 def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 3848 (PSHUFDri VR128:$src1, imm:$imm)>; 3849 def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), 3850 (PSHUFHWri VR128:$src, imm:$imm)>; 3851 def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), 3852 (i8 imm:$imm))), 3853 (PSHUFHWmi addr:$src, imm:$imm)>; 3854 def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), 3855 (PSHUFLWri VR128:$src, imm:$imm)>; 3856 def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), 3857 (i8 imm:$imm))), 3858 (PSHUFLWmi addr:$src, imm:$imm)>; 3859} 3860 3861//===---------------------------------------------------------------------===// 3862// SSE2 - Packed Integer Unpack Instructions 3863//===---------------------------------------------------------------------===// 3864 3865let ExeDomain = SSEPackedInt in { 3866multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 3867 SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { 3868 def rr : PDI<opc, MRMSrcReg, 3869 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3870 !if(Is2Addr, 3871 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3872 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3873 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>; 3874 def rm : PDI<opc, MRMSrcMem, 3875 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3876 !if(Is2Addr, 3877 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 3878 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3879 [(set VR128:$dst, (OpNode VR128:$src1, 3880 (bc_frag (memopv2i64 3881 addr:$src2))))]>; 3882} 3883 3884let Predicates = [HasAVX] in { 3885 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, 3886 bc_v16i8, 0>, VEX_4V; 3887 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, 3888 bc_v8i16, 0>, VEX_4V; 3889 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, 3890 bc_v4i32, 0>, VEX_4V; 3891 3892 /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen 3893 /// knew to collapse (bitconvert VT to VT) into its operand. 3894 def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 3895 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3896 "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3897 [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, 3898 VR128:$src2)))]>, VEX_4V; 3899 def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 3900 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3901 "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3902 [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, 3903 (memopv2i64 addr:$src2))))]>, VEX_4V; 3904 3905 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, 3906 bc_v16i8, 0>, VEX_4V; 3907 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, 3908 bc_v8i16, 0>, VEX_4V; 3909 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, 3910 bc_v4i32, 0>, VEX_4V; 3911 3912 /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen 3913 /// knew to collapse (bitconvert VT to VT) into its operand. 3914 def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 3915 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3916 "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3917 [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, 3918 VR128:$src2)))]>, VEX_4V; 3919 def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 3920 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3921 "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 3922 [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, 3923 (memopv2i64 addr:$src2))))]>, VEX_4V; 3924} 3925 3926let Constraints = "$src1 = $dst" in { 3927 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; 3928 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; 3929 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; 3930 3931 /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen 3932 /// knew to collapse (bitconvert VT to VT) into its operand. 3933 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, 3934 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3935 "punpcklqdq\t{$src2, $dst|$dst, $src2}", 3936 [(set VR128:$dst, 3937 (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; 3938 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, 3939 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3940 "punpcklqdq\t{$src2, $dst|$dst, $src2}", 3941 [(set VR128:$dst, 3942 (v2i64 (X86Punpcklqdq VR128:$src1, 3943 (memopv2i64 addr:$src2))))]>; 3944 3945 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; 3946 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; 3947 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; 3948 3949 /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen 3950 /// knew to collapse (bitconvert VT to VT) into its operand. 3951 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, 3952 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3953 "punpckhqdq\t{$src2, $dst|$dst, $src2}", 3954 [(set VR128:$dst, 3955 (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; 3956 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, 3957 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 3958 "punpckhqdq\t{$src2, $dst|$dst, $src2}", 3959 [(set VR128:$dst, 3960 (v2i64 (X86Punpckhqdq VR128:$src1, 3961 (memopv2i64 addr:$src2))))]>; 3962} 3963} // ExeDomain = SSEPackedInt 3964 3965// Splat v2f64 / v2i64 3966let AddedComplexity = 10 in { 3967 def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), 3968 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; 3969 def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), 3970 (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>; 3971} 3972 3973//===---------------------------------------------------------------------===// 3974// SSE2 - Packed Integer Extract and Insert 3975//===---------------------------------------------------------------------===// 3976 3977let ExeDomain = SSEPackedInt in { 3978multiclass sse2_pinsrw<bit Is2Addr = 1> { 3979 def rri : Ii8<0xC4, MRMSrcReg, 3980 (outs VR128:$dst), (ins VR128:$src1, 3981 GR32:$src2, i32i8imm:$src3), 3982 !if(Is2Addr, 3983 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3984 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3985 [(set VR128:$dst, 3986 (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; 3987 def rmi : Ii8<0xC4, MRMSrcMem, 3988 (outs VR128:$dst), (ins VR128:$src1, 3989 i16mem:$src2, i32i8imm:$src3), 3990 !if(Is2Addr, 3991 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 3992 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 3993 [(set VR128:$dst, 3994 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 3995 imm:$src3))]>; 3996} 3997 3998// Extract 3999let Predicates = [HasAVX] in 4000def VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4001 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), 4002 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4003 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), 4004 imm:$src2))]>, TB, OpSize, VEX; 4005def PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4006 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), 4007 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4008 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), 4009 imm:$src2))]>; 4010 4011// Insert 4012let Predicates = [HasAVX] in { 4013 defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; 4014 def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), 4015 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 4016 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 4017 []>, TB, OpSize, VEX_4V; 4018} 4019 4020let Constraints = "$src1 = $dst" in 4021 defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; 4022 4023} // ExeDomain = SSEPackedInt 4024 4025//===---------------------------------------------------------------------===// 4026// SSE2 - Packed Mask Creation 4027//===---------------------------------------------------------------------===// 4028 4029let ExeDomain = SSEPackedInt in { 4030 4031def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), 4032 "pmovmskb\t{$src, $dst|$dst, $src}", 4033 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; 4034def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 4035 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; 4036def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), 4037 "pmovmskb\t{$src, $dst|$dst, $src}", 4038 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; 4039 4040} // ExeDomain = SSEPackedInt 4041 4042//===---------------------------------------------------------------------===// 4043// SSE2 - Conditional Store 4044//===---------------------------------------------------------------------===// 4045 4046let ExeDomain = SSEPackedInt in { 4047 4048let Uses = [EDI] in 4049def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4050 (ins VR128:$src, VR128:$mask), 4051 "maskmovdqu\t{$mask, $src|$src, $mask}", 4052 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX; 4053let Uses = [RDI] in 4054def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4055 (ins VR128:$src, VR128:$mask), 4056 "maskmovdqu\t{$mask, $src|$src, $mask}", 4057 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; 4058 4059let Uses = [EDI] in 4060def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4061 "maskmovdqu\t{$mask, $src|$src, $mask}", 4062 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; 4063let Uses = [RDI] in 4064def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4065 "maskmovdqu\t{$mask, $src|$src, $mask}", 4066 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; 4067 4068} // ExeDomain = SSEPackedInt 4069 4070//===---------------------------------------------------------------------===// 4071// SSE2 - Move Doubleword 4072//===---------------------------------------------------------------------===// 4073 4074//===---------------------------------------------------------------------===// 4075// Move Int Doubleword to Packed Double Int 4076// 4077def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4078 "movd\t{$src, $dst|$dst, $src}", 4079 [(set VR128:$dst, 4080 (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; 4081def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4082 "movd\t{$src, $dst|$dst, $src}", 4083 [(set VR128:$dst, 4084 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4085 VEX; 4086def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4087 "mov{d|q}\t{$src, $dst|$dst, $src}", 4088 [(set VR128:$dst, 4089 (v2i64 (scalar_to_vector GR64:$src)))]>, VEX; 4090def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4091 "mov{d|q}\t{$src, $dst|$dst, $src}", 4092 [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX; 4093 4094def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4095 "movd\t{$src, $dst|$dst, $src}", 4096 [(set VR128:$dst, 4097 (v4i32 (scalar_to_vector GR32:$src)))]>; 4098def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4099 "movd\t{$src, $dst|$dst, $src}", 4100 [(set VR128:$dst, 4101 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; 4102def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4103 "mov{d|q}\t{$src, $dst|$dst, $src}", 4104 [(set VR128:$dst, 4105 (v2i64 (scalar_to_vector GR64:$src)))]>; 4106def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4107 "mov{d|q}\t{$src, $dst|$dst, $src}", 4108 [(set FR64:$dst, (bitconvert GR64:$src))]>; 4109 4110//===---------------------------------------------------------------------===// 4111// Move Int Doubleword to Single Scalar 4112// 4113def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4114 "movd\t{$src, $dst|$dst, $src}", 4115 [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; 4116 4117def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4118 "movd\t{$src, $dst|$dst, $src}", 4119 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, 4120 VEX; 4121def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4122 "movd\t{$src, $dst|$dst, $src}", 4123 [(set FR32:$dst, (bitconvert GR32:$src))]>; 4124 4125def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4126 "movd\t{$src, $dst|$dst, $src}", 4127 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; 4128 4129//===---------------------------------------------------------------------===// 4130// Move Packed Doubleword Int to Packed Double Int 4131// 4132def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4133 "movd\t{$src, $dst|$dst, $src}", 4134 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4135 (iPTR 0)))]>, VEX; 4136def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), 4137 (ins i32mem:$dst, VR128:$src), 4138 "movd\t{$src, $dst|$dst, $src}", 4139 [(store (i32 (vector_extract (v4i32 VR128:$src), 4140 (iPTR 0))), addr:$dst)]>, VEX; 4141def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4142 "movd\t{$src, $dst|$dst, $src}", 4143 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4144 (iPTR 0)))]>; 4145def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4146 "movd\t{$src, $dst|$dst, $src}", 4147 [(store (i32 (vector_extract (v4i32 VR128:$src), 4148 (iPTR 0))), addr:$dst)]>; 4149 4150//===---------------------------------------------------------------------===// 4151// Move Packed Doubleword Int first element to Doubleword Int 4152// 4153def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4154 "mov{d|q}\t{$src, $dst|$dst, $src}", 4155 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4156 (iPTR 0)))]>, 4157 TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; 4158 4159def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4160 "mov{d|q}\t{$src, $dst|$dst, $src}", 4161 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4162 (iPTR 0)))]>; 4163 4164//===---------------------------------------------------------------------===// 4165// Bitcast FR64 <-> GR64 4166// 4167let Predicates = [HasAVX] in 4168def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4169 "vmovq\t{$src, $dst|$dst, $src}", 4170 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 4171 VEX; 4172def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4173 "mov{d|q}\t{$src, $dst|$dst, $src}", 4174 [(set GR64:$dst, (bitconvert FR64:$src))]>; 4175def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4176 "movq\t{$src, $dst|$dst, $src}", 4177 [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; 4178 4179def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4180 "movq\t{$src, $dst|$dst, $src}", 4181 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; 4182def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4183 "mov{d|q}\t{$src, $dst|$dst, $src}", 4184 [(set GR64:$dst, (bitconvert FR64:$src))]>; 4185def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4186 "movq\t{$src, $dst|$dst, $src}", 4187 [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; 4188 4189//===---------------------------------------------------------------------===// 4190// Move Scalar Single to Double Int 4191// 4192def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4193 "movd\t{$src, $dst|$dst, $src}", 4194 [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; 4195def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4196 "movd\t{$src, $dst|$dst, $src}", 4197 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; 4198def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4199 "movd\t{$src, $dst|$dst, $src}", 4200 [(set GR32:$dst, (bitconvert FR32:$src))]>; 4201def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4202 "movd\t{$src, $dst|$dst, $src}", 4203 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; 4204 4205//===---------------------------------------------------------------------===// 4206// Patterns and instructions to describe movd/movq to XMM register zero-extends 4207// 4208let AddedComplexity = 15 in { 4209def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4210 "movd\t{$src, $dst|$dst, $src}", 4211 [(set VR128:$dst, (v4i32 (X86vzmovl 4212 (v4i32 (scalar_to_vector GR32:$src)))))]>, 4213 VEX; 4214def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4215 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4216 [(set VR128:$dst, (v2i64 (X86vzmovl 4217 (v2i64 (scalar_to_vector GR64:$src)))))]>, 4218 VEX, VEX_W; 4219} 4220let AddedComplexity = 15 in { 4221def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4222 "movd\t{$src, $dst|$dst, $src}", 4223 [(set VR128:$dst, (v4i32 (X86vzmovl 4224 (v4i32 (scalar_to_vector GR32:$src)))))]>; 4225def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4226 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4227 [(set VR128:$dst, (v2i64 (X86vzmovl 4228 (v2i64 (scalar_to_vector GR64:$src)))))]>; 4229} 4230 4231let AddedComplexity = 20 in { 4232def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4233 "movd\t{$src, $dst|$dst, $src}", 4234 [(set VR128:$dst, 4235 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector 4236 (loadi32 addr:$src))))))]>, 4237 VEX; 4238def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4239 "movd\t{$src, $dst|$dst, $src}", 4240 [(set VR128:$dst, 4241 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector 4242 (loadi32 addr:$src))))))]>; 4243} 4244 4245let Predicates = [HasSSE2], AddedComplexity = 20 in { 4246 def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), 4247 (MOVZDI2PDIrm addr:$src)>; 4248 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4249 (MOVZDI2PDIrm addr:$src)>; 4250 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4251 (MOVZDI2PDIrm addr:$src)>; 4252} 4253 4254let Predicates = [HasAVX] in { 4255 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4256 let AddedComplexity = 20 in { 4257 def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), 4258 (VMOVZDI2PDIrm addr:$src)>; 4259 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4260 (VMOVZDI2PDIrm addr:$src)>; 4261 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4262 (VMOVZDI2PDIrm addr:$src)>; 4263 } 4264 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4265 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4266 (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), 4267 (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; 4268 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4269 (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), 4270 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 4271} 4272 4273// These are the correct encodings of the instructions so that we know how to 4274// read correct assembly, even though we continue to emit the wrong ones for 4275// compatibility with Darwin's buggy assembler. 4276def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4277 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4278def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4279 (MOV64toSDrr FR64:$dst, GR64:$src), 0>; 4280def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4281 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4282def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4283 (MOVSDto64rr GR64:$dst, FR64:$src), 0>; 4284def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4285 (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; 4286def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4287 (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; 4288 4289//===---------------------------------------------------------------------===// 4290// SSE2 - Move Quadword 4291//===---------------------------------------------------------------------===// 4292 4293//===---------------------------------------------------------------------===// 4294// Move Quadword Int to Packed Quadword Int 4295// 4296def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4297 "vmovq\t{$src, $dst|$dst, $src}", 4298 [(set VR128:$dst, 4299 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4300 VEX, Requires<[HasAVX]>; 4301def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4302 "movq\t{$src, $dst|$dst, $src}", 4303 [(set VR128:$dst, 4304 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4305 Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix 4306 4307//===---------------------------------------------------------------------===// 4308// Move Packed Quadword Int to Quadword Int 4309// 4310def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4311 "movq\t{$src, $dst|$dst, $src}", 4312 [(store (i64 (vector_extract (v2i64 VR128:$src), 4313 (iPTR 0))), addr:$dst)]>, VEX; 4314def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4315 "movq\t{$src, $dst|$dst, $src}", 4316 [(store (i64 (vector_extract (v2i64 VR128:$src), 4317 (iPTR 0))), addr:$dst)]>; 4318 4319//===---------------------------------------------------------------------===// 4320// Store / copy lower 64-bits of a XMM register. 4321// 4322def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4323 "movq\t{$src, $dst|$dst, $src}", 4324 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; 4325def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4326 "movq\t{$src, $dst|$dst, $src}", 4327 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; 4328 4329let AddedComplexity = 20 in 4330def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4331 "vmovq\t{$src, $dst|$dst, $src}", 4332 [(set VR128:$dst, 4333 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4334 (loadi64 addr:$src))))))]>, 4335 XS, VEX, Requires<[HasAVX]>; 4336 4337let AddedComplexity = 20 in 4338def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4339 "movq\t{$src, $dst|$dst, $src}", 4340 [(set VR128:$dst, 4341 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4342 (loadi64 addr:$src))))))]>, 4343 XS, Requires<[HasSSE2]>; 4344 4345let Predicates = [HasSSE2], AddedComplexity = 20 in { 4346 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4347 (MOVZQI2PQIrm addr:$src)>; 4348 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4349 (MOVZQI2PQIrm addr:$src)>; 4350 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 4351} 4352 4353let Predicates = [HasAVX], AddedComplexity = 20 in { 4354 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4355 (VMOVZQI2PQIrm addr:$src)>; 4356 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4357 (VMOVZQI2PQIrm addr:$src)>; 4358 def : Pat<(v2i64 (X86vzload addr:$src)), 4359 (VMOVZQI2PQIrm addr:$src)>; 4360} 4361 4362//===---------------------------------------------------------------------===// 4363// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4364// IA32 document. movq xmm1, xmm2 does clear the high bits. 4365// 4366let AddedComplexity = 15 in 4367def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4368 "vmovq\t{$src, $dst|$dst, $src}", 4369 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4370 XS, VEX, Requires<[HasAVX]>; 4371let AddedComplexity = 15 in 4372def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4373 "movq\t{$src, $dst|$dst, $src}", 4374 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, 4375 XS, Requires<[HasSSE2]>; 4376 4377let AddedComplexity = 20 in 4378def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4379 "vmovq\t{$src, $dst|$dst, $src}", 4380 [(set VR128:$dst, (v2i64 (X86vzmovl 4381 (loadv2i64 addr:$src))))]>, 4382 XS, VEX, Requires<[HasAVX]>; 4383let AddedComplexity = 20 in { 4384def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4385 "movq\t{$src, $dst|$dst, $src}", 4386 [(set VR128:$dst, (v2i64 (X86vzmovl 4387 (loadv2i64 addr:$src))))]>, 4388 XS, Requires<[HasSSE2]>; 4389} 4390 4391let AddedComplexity = 20 in { 4392 let Predicates = [HasSSE2] in { 4393 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), 4394 (MOVZPQILo2PQIrm addr:$src)>; 4395 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4396 (MOVZPQILo2PQIrr VR128:$src)>; 4397 } 4398 let Predicates = [HasAVX] in { 4399 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), 4400 (VMOVZPQILo2PQIrm addr:$src)>; 4401 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4402 (VMOVZPQILo2PQIrr VR128:$src)>; 4403 } 4404} 4405 4406// Instructions to match in the assembler 4407def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4408 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; 4409def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4410 "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; 4411// Recognize "movd" with GR64 destination, but encode as a "movq" 4412def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4413 "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; 4414 4415// Instructions for the disassembler 4416// xr = XMM register 4417// xm = mem64 4418 4419let Predicates = [HasAVX] in 4420def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4421 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; 4422def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4423 "movq\t{$src, $dst|$dst, $src}", []>, XS; 4424 4425//===---------------------------------------------------------------------===// 4426// SSE3 - Conversion Instructions 4427//===---------------------------------------------------------------------===// 4428 4429// Convert Packed Double FP to Packed DW Integers 4430let Predicates = [HasAVX] in { 4431// The assembler can recognize rr 256-bit instructions by seeing a ymm 4432// register, but the same isn't true when using memory operands instead. 4433// Provide other assembly rr and rm forms to address this explicitly. 4434def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4435 "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; 4436def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 4437 "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; 4438 4439// XMM only 4440def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4441 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; 4442def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 4443 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; 4444 4445// YMM only 4446def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 4447 "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; 4448def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 4449 "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; 4450} 4451 4452def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 4453 "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; 4454def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4455 "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; 4456 4457def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 4458 (VCVTPD2DQYrr VR256:$src)>; 4459def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), 4460 (VCVTPD2DQYrm addr:$src)>; 4461 4462// Convert Packed DW Integers to Packed Double FP 4463let Predicates = [HasAVX] in { 4464def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 4465 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; 4466def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4467 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; 4468def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 4469 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; 4470def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 4471 "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; 4472} 4473 4474def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 4475 "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; 4476def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4477 "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; 4478 4479// AVX 256-bit register conversion intrinsics 4480def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), 4481 (VCVTDQ2PDYrr VR128:$src)>; 4482def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)), 4483 (VCVTDQ2PDYrm addr:$src)>; 4484 4485def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), 4486 (VCVTPD2DQYrr VR256:$src)>; 4487def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), 4488 (VCVTPD2DQYrm addr:$src)>; 4489 4490def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 4491 (VCVTDQ2PDYrr VR128:$src)>; 4492def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))), 4493 (VCVTDQ2PDYrm addr:$src)>; 4494 4495//===---------------------------------------------------------------------===// 4496// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4497//===---------------------------------------------------------------------===// 4498multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4499 ValueType vt, RegisterClass RC, PatFrag mem_frag, 4500 X86MemOperand x86memop> { 4501def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4502 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4503 [(set RC:$dst, (vt (OpNode RC:$src)))]>; 4504def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4505 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4506 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>; 4507} 4508 4509let Predicates = [HasAVX] in { 4510 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4511 v4f32, VR128, memopv4f32, f128mem>, VEX; 4512 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4513 v4f32, VR128, memopv4f32, f128mem>, VEX; 4514 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4515 v8f32, VR256, memopv8f32, f256mem>, VEX; 4516 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4517 v8f32, VR256, memopv8f32, f256mem>, VEX; 4518} 4519defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4520 memopv4f32, f128mem>; 4521defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4522 memopv4f32, f128mem>; 4523 4524let Predicates = [HasSSE3] in { 4525 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4526 (MOVSHDUPrr VR128:$src)>; 4527 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 4528 (MOVSHDUPrm addr:$src)>; 4529 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4530 (MOVSLDUPrr VR128:$src)>; 4531 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 4532 (MOVSLDUPrm addr:$src)>; 4533} 4534 4535let Predicates = [HasAVX] in { 4536 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4537 (VMOVSHDUPrr VR128:$src)>; 4538 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 4539 (VMOVSHDUPrm addr:$src)>; 4540 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4541 (VMOVSLDUPrr VR128:$src)>; 4542 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 4543 (VMOVSLDUPrm addr:$src)>; 4544 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4545 (VMOVSHDUPYrr VR256:$src)>; 4546 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))), 4547 (VMOVSHDUPYrm addr:$src)>; 4548 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 4549 (VMOVSLDUPYrr VR256:$src)>; 4550 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))), 4551 (VMOVSLDUPYrm addr:$src)>; 4552} 4553 4554//===---------------------------------------------------------------------===// 4555// SSE3 - Replicate Double FP - MOVDDUP 4556//===---------------------------------------------------------------------===// 4557 4558multiclass sse3_replicate_dfp<string OpcodeStr> { 4559def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4560 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4561 [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; 4562def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 4563 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4564 [(set VR128:$dst, 4565 (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), 4566 (undef))))]>; 4567} 4568 4569// FIXME: Merge with above classe when there're patterns for the ymm version 4570multiclass sse3_replicate_dfp_y<string OpcodeStr> { 4571let Predicates = [HasAVX] in { 4572 def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4573 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4574 []>; 4575 def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 4576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4577 []>; 4578 } 4579} 4580 4581defm MOVDDUP : sse3_replicate_dfp<"movddup">; 4582defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 4583defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; 4584 4585let Predicates = [HasSSE3] in { 4586 def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), 4587 (undef)), 4588 (MOVDDUPrm addr:$src)>; 4589 let AddedComplexity = 5 in { 4590 def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; 4591 def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), 4592 (MOVDDUPrm addr:$src)>; 4593 def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; 4594 def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), 4595 (MOVDDUPrm addr:$src)>; 4596 } 4597 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 4598 (MOVDDUPrm addr:$src)>; 4599 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 4600 (MOVDDUPrm addr:$src)>; 4601 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 4602 (MOVDDUPrm addr:$src)>; 4603 def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), 4604 (MOVDDUPrm addr:$src)>; 4605 def : Pat<(X86Movddup (bc_v2f64 4606 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 4607 (MOVDDUPrm addr:$src)>; 4608} 4609 4610let Predicates = [HasAVX] in { 4611 def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), 4612 (undef)), 4613 (VMOVDDUPrm addr:$src)>; 4614 let AddedComplexity = 5 in { 4615 def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; 4616 def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), 4617 (VMOVDDUPrm addr:$src)>; 4618 def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; 4619 def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), 4620 (VMOVDDUPrm addr:$src)>; 4621 } 4622 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 4623 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4624 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 4625 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4626 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 4627 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4628 def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), 4629 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4630 def : Pat<(X86Movddup (bc_v2f64 4631 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 4632 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 4633 4634 // 256-bit version 4635 def : Pat<(X86Movddup (memopv4f64 addr:$src)), 4636 (VMOVDDUPYrm addr:$src)>; 4637 def : Pat<(X86Movddup (memopv4i64 addr:$src)), 4638 (VMOVDDUPYrm addr:$src)>; 4639 def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))), 4640 (VMOVDDUPYrm addr:$src)>; 4641 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 4642 (VMOVDDUPYrm addr:$src)>; 4643 def : Pat<(X86Movddup (v4f64 VR256:$src)), 4644 (VMOVDDUPYrr VR256:$src)>; 4645 def : Pat<(X86Movddup (v4i64 VR256:$src)), 4646 (VMOVDDUPYrr VR256:$src)>; 4647} 4648 4649//===---------------------------------------------------------------------===// 4650// SSE3 - Move Unaligned Integer 4651//===---------------------------------------------------------------------===// 4652 4653let Predicates = [HasAVX] in { 4654 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4655 "vlddqu\t{$src, $dst|$dst, $src}", 4656 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 4657 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4658 "vlddqu\t{$src, $dst|$dst, $src}", 4659 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX; 4660} 4661def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4662 "lddqu\t{$src, $dst|$dst, $src}", 4663 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; 4664 4665//===---------------------------------------------------------------------===// 4666// SSE3 - Arithmetic 4667//===---------------------------------------------------------------------===// 4668 4669multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 4670 X86MemOperand x86memop, bit Is2Addr = 1> { 4671 def rr : I<0xD0, MRMSrcReg, 4672 (outs RC:$dst), (ins RC:$src1, RC:$src2), 4673 !if(Is2Addr, 4674 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4675 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4676 [(set RC:$dst, (Int RC:$src1, RC:$src2))]>; 4677 def rm : I<0xD0, MRMSrcMem, 4678 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4679 !if(Is2Addr, 4680 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4681 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4682 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; 4683} 4684 4685let Predicates = [HasAVX], 4686 ExeDomain = SSEPackedDouble in { 4687 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 4688 f128mem, 0>, TB, XD, VEX_4V; 4689 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 4690 f128mem, 0>, TB, OpSize, VEX_4V; 4691 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 4692 f256mem, 0>, TB, XD, VEX_4V; 4693 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 4694 f256mem, 0>, TB, OpSize, VEX_4V; 4695} 4696let Constraints = "$src1 = $dst", Predicates = [HasSSE3], 4697 ExeDomain = SSEPackedDouble in { 4698 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 4699 f128mem>, TB, XD; 4700 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 4701 f128mem>, TB, OpSize; 4702} 4703 4704//===---------------------------------------------------------------------===// 4705// SSE3 Instructions 4706//===---------------------------------------------------------------------===// 4707 4708// Horizontal ops 4709multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4710 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 4711 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4712 !if(Is2Addr, 4713 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4714 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4715 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; 4716 4717 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4718 !if(Is2Addr, 4719 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4720 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4721 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; 4722} 4723multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 4724 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 4725 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 4726 !if(Is2Addr, 4727 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4728 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4729 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; 4730 4731 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 4732 !if(Is2Addr, 4733 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4734 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4735 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; 4736} 4737 4738let Predicates = [HasAVX] in { 4739 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 4740 X86fhadd, 0>, VEX_4V; 4741 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 4742 X86fhadd, 0>, VEX_4V; 4743 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 4744 X86fhsub, 0>, VEX_4V; 4745 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 4746 X86fhsub, 0>, VEX_4V; 4747 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 4748 X86fhadd, 0>, VEX_4V; 4749 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 4750 X86fhadd, 0>, VEX_4V; 4751 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 4752 X86fhsub, 0>, VEX_4V; 4753 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 4754 X86fhsub, 0>, VEX_4V; 4755} 4756 4757let Constraints = "$src1 = $dst" in { 4758 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; 4759 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; 4760 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; 4761 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; 4762} 4763 4764//===---------------------------------------------------------------------===// 4765// SSSE3 - Packed Absolute Instructions 4766//===---------------------------------------------------------------------===// 4767 4768 4769/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 4770multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, 4771 PatFrag mem_frag128, Intrinsic IntId128> { 4772 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4773 (ins VR128:$src), 4774 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4775 [(set VR128:$dst, (IntId128 VR128:$src))]>, 4776 OpSize; 4777 4778 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4779 (ins i128mem:$src), 4780 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4781 [(set VR128:$dst, 4782 (IntId128 4783 (bitconvert (mem_frag128 addr:$src))))]>, OpSize; 4784} 4785 4786let Predicates = [HasAVX] in { 4787 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, 4788 int_x86_ssse3_pabs_b_128>, VEX; 4789 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, 4790 int_x86_ssse3_pabs_w_128>, VEX; 4791 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32, 4792 int_x86_ssse3_pabs_d_128>, VEX; 4793} 4794 4795defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, 4796 int_x86_ssse3_pabs_b_128>; 4797defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, 4798 int_x86_ssse3_pabs_w_128>; 4799defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, 4800 int_x86_ssse3_pabs_d_128>; 4801 4802//===---------------------------------------------------------------------===// 4803// SSSE3 - Packed Binary Operator Instructions 4804//===---------------------------------------------------------------------===// 4805 4806/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 4807multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 4808 PatFrag mem_frag128, Intrinsic IntId128, 4809 bit Is2Addr = 1> { 4810 let isCommutable = 1 in 4811 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 4812 (ins VR128:$src1, VR128:$src2), 4813 !if(Is2Addr, 4814 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4815 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4816 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 4817 OpSize; 4818 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 4819 (ins VR128:$src1, i128mem:$src2), 4820 !if(Is2Addr, 4821 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4822 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4823 [(set VR128:$dst, 4824 (IntId128 VR128:$src1, 4825 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; 4826} 4827 4828let ImmT = NoImm, Predicates = [HasAVX] in { 4829let isCommutable = 0 in { 4830 defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, 4831 int_x86_ssse3_phadd_w_128, 0>, VEX_4V; 4832 defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32, 4833 int_x86_ssse3_phadd_d_128, 0>, VEX_4V; 4834 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16, 4835 int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; 4836 defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16, 4837 int_x86_ssse3_phsub_w_128, 0>, VEX_4V; 4838 defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32, 4839 int_x86_ssse3_phsub_d_128, 0>, VEX_4V; 4840 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16, 4841 int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; 4842 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8, 4843 int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; 4844 defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8, 4845 int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; 4846 defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8, 4847 int_x86_ssse3_psign_b_128, 0>, VEX_4V; 4848 defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16, 4849 int_x86_ssse3_psign_w_128, 0>, VEX_4V; 4850 defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32, 4851 int_x86_ssse3_psign_d_128, 0>, VEX_4V; 4852} 4853defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, 4854 int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; 4855} 4856 4857// None of these have i8 immediate fields. 4858let ImmT = NoImm, Constraints = "$src1 = $dst" in { 4859let isCommutable = 0 in { 4860 defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, 4861 int_x86_ssse3_phadd_w_128>; 4862 defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, 4863 int_x86_ssse3_phadd_d_128>; 4864 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, 4865 int_x86_ssse3_phadd_sw_128>; 4866 defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, 4867 int_x86_ssse3_phsub_w_128>; 4868 defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, 4869 int_x86_ssse3_phsub_d_128>; 4870 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, 4871 int_x86_ssse3_phsub_sw_128>; 4872 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, 4873 int_x86_ssse3_pmadd_ub_sw_128>; 4874 defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8, 4875 int_x86_ssse3_pshuf_b_128>; 4876 defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, 4877 int_x86_ssse3_psign_b_128>; 4878 defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, 4879 int_x86_ssse3_psign_w_128>; 4880 defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, 4881 int_x86_ssse3_psign_d_128>; 4882} 4883defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, 4884 int_x86_ssse3_pmul_hr_sw_128>; 4885} 4886 4887let Predicates = [HasSSSE3] in { 4888 def : Pat<(X86pshufb VR128:$src, VR128:$mask), 4889 (PSHUFBrr128 VR128:$src, VR128:$mask)>; 4890 def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), 4891 (PSHUFBrm128 VR128:$src, addr:$mask)>; 4892 4893 def : Pat<(X86psignb VR128:$src1, VR128:$src2), 4894 (PSIGNBrr128 VR128:$src1, VR128:$src2)>; 4895 def : Pat<(X86psignw VR128:$src1, VR128:$src2), 4896 (PSIGNWrr128 VR128:$src1, VR128:$src2)>; 4897 def : Pat<(X86psignd VR128:$src1, VR128:$src2), 4898 (PSIGNDrr128 VR128:$src1, VR128:$src2)>; 4899} 4900 4901let Predicates = [HasAVX] in { 4902 def : Pat<(X86pshufb VR128:$src, VR128:$mask), 4903 (VPSHUFBrr128 VR128:$src, VR128:$mask)>; 4904 def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), 4905 (VPSHUFBrm128 VR128:$src, addr:$mask)>; 4906 4907 def : Pat<(X86psignb VR128:$src1, VR128:$src2), 4908 (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; 4909 def : Pat<(X86psignw VR128:$src1, VR128:$src2), 4910 (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; 4911 def : Pat<(X86psignd VR128:$src1, VR128:$src2), 4912 (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; 4913} 4914 4915//===---------------------------------------------------------------------===// 4916// SSSE3 - Packed Align Instruction Patterns 4917//===---------------------------------------------------------------------===// 4918 4919multiclass ssse3_palign<string asm, bit Is2Addr = 1> { 4920 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 4921 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 4922 !if(Is2Addr, 4923 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4924 !strconcat(asm, 4925 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4926 []>, OpSize; 4927 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 4928 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 4929 !if(Is2Addr, 4930 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 4931 !strconcat(asm, 4932 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 4933 []>, OpSize; 4934} 4935 4936let Predicates = [HasAVX] in 4937 defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; 4938let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in 4939 defm PALIGN : ssse3_palign<"palignr">; 4940 4941let Predicates = [HasSSSE3] in { 4942def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4943 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4944def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4945 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4946def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4947 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4948def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4949 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4950} 4951 4952let Predicates = [HasAVX] in { 4953def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4954 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4955def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4956 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4957def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4958 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4959def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 4960 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 4961} 4962 4963//===---------------------------------------------------------------------===// 4964// SSSE3 - Thread synchronization 4965//===---------------------------------------------------------------------===// 4966 4967let usesCustomInserter = 1 in { 4968def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 4969 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; 4970def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), 4971 [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; 4972} 4973 4974let Uses = [EAX, ECX, EDX] in 4975def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, 4976 Requires<[HasSSE3]>; 4977let Uses = [ECX, EAX] in 4978def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, 4979 Requires<[HasSSE3]>; 4980 4981def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; 4982def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; 4983 4984def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, 4985 Requires<[In32BitMode]>; 4986def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, 4987 Requires<[In64BitMode]>; 4988 4989//===----------------------------------------------------------------------===// 4990// SSE4.1 - Packed Move with Sign/Zero Extend 4991//===----------------------------------------------------------------------===// 4992 4993multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 4994 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4995 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4996 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 4997 4998 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4999 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5000 [(set VR128:$dst, 5001 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, 5002 OpSize; 5003} 5004 5005let Predicates = [HasAVX] in { 5006defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, 5007 VEX; 5008defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, 5009 VEX; 5010defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, 5011 VEX; 5012defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, 5013 VEX; 5014defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, 5015 VEX; 5016defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, 5017 VEX; 5018} 5019 5020defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; 5021defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; 5022defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; 5023defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; 5024defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; 5025defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; 5026 5027let Predicates = [HasSSE41] in { 5028 // Common patterns involving scalar load. 5029 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5030 (PMOVSXBWrm addr:$src)>; 5031 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5032 (PMOVSXBWrm addr:$src)>; 5033 5034 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5035 (PMOVSXWDrm addr:$src)>; 5036 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5037 (PMOVSXWDrm addr:$src)>; 5038 5039 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5040 (PMOVSXDQrm addr:$src)>; 5041 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5042 (PMOVSXDQrm addr:$src)>; 5043 5044 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5045 (PMOVZXBWrm addr:$src)>; 5046 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5047 (PMOVZXBWrm addr:$src)>; 5048 5049 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5050 (PMOVZXWDrm addr:$src)>; 5051 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5052 (PMOVZXWDrm addr:$src)>; 5053 5054 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5055 (PMOVZXDQrm addr:$src)>; 5056 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5057 (PMOVZXDQrm addr:$src)>; 5058} 5059 5060let Predicates = [HasAVX] in { 5061 // Common patterns involving scalar load. 5062 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5063 (VPMOVSXBWrm addr:$src)>; 5064 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5065 (VPMOVSXBWrm addr:$src)>; 5066 5067 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5068 (VPMOVSXWDrm addr:$src)>; 5069 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5070 (VPMOVSXWDrm addr:$src)>; 5071 5072 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5073 (VPMOVSXDQrm addr:$src)>; 5074 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5075 (VPMOVSXDQrm addr:$src)>; 5076 5077 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5078 (VPMOVZXBWrm addr:$src)>; 5079 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5080 (VPMOVZXBWrm addr:$src)>; 5081 5082 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5083 (VPMOVZXWDrm addr:$src)>; 5084 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5085 (VPMOVZXWDrm addr:$src)>; 5086 5087 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5088 (VPMOVZXDQrm addr:$src)>; 5089 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5090 (VPMOVZXDQrm addr:$src)>; 5091} 5092 5093 5094multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5095 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5096 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5097 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5098 5099 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 5100 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5101 [(set VR128:$dst, 5102 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, 5103 OpSize; 5104} 5105 5106let Predicates = [HasAVX] in { 5107defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, 5108 VEX; 5109defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, 5110 VEX; 5111defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, 5112 VEX; 5113defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, 5114 VEX; 5115} 5116 5117defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; 5118defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; 5119defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; 5120defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; 5121 5122let Predicates = [HasSSE41] in { 5123 // Common patterns involving scalar load 5124 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5125 (PMOVSXBDrm addr:$src)>; 5126 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5127 (PMOVSXWQrm addr:$src)>; 5128 5129 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5130 (PMOVZXBDrm addr:$src)>; 5131 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5132 (PMOVZXWQrm addr:$src)>; 5133} 5134 5135let Predicates = [HasAVX] in { 5136 // Common patterns involving scalar load 5137 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5138 (VPMOVSXBDrm addr:$src)>; 5139 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5140 (VPMOVSXWQrm addr:$src)>; 5141 5142 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5143 (VPMOVZXBDrm addr:$src)>; 5144 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5145 (VPMOVZXWQrm addr:$src)>; 5146} 5147 5148multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5149 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5150 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5151 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5152 5153 // Expecting a i16 load any extended to i32 value. 5154 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src), 5155 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5156 [(set VR128:$dst, (IntId (bitconvert 5157 (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>, 5158 OpSize; 5159} 5160 5161let Predicates = [HasAVX] in { 5162defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, 5163 VEX; 5164defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, 5165 VEX; 5166} 5167defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; 5168defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; 5169 5170let Predicates = [HasSSE41] in { 5171 // Common patterns involving scalar load 5172 def : Pat<(int_x86_sse41_pmovsxbq 5173 (bitconvert (v4i32 (X86vzmovl 5174 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5175 (PMOVSXBQrm addr:$src)>; 5176 5177 def : Pat<(int_x86_sse41_pmovzxbq 5178 (bitconvert (v4i32 (X86vzmovl 5179 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5180 (PMOVZXBQrm addr:$src)>; 5181} 5182 5183let Predicates = [HasAVX] in { 5184 // Common patterns involving scalar load 5185 def : Pat<(int_x86_sse41_pmovsxbq 5186 (bitconvert (v4i32 (X86vzmovl 5187 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5188 (VPMOVSXBQrm addr:$src)>; 5189 5190 def : Pat<(int_x86_sse41_pmovzxbq 5191 (bitconvert (v4i32 (X86vzmovl 5192 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5193 (VPMOVZXBQrm addr:$src)>; 5194} 5195 5196//===----------------------------------------------------------------------===// 5197// SSE4.1 - Extract Instructions 5198//===----------------------------------------------------------------------===// 5199 5200/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5201multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 5202 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5203 (ins VR128:$src1, i32i8imm:$src2), 5204 !strconcat(OpcodeStr, 5205 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5206 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, 5207 OpSize; 5208 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5209 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), 5210 !strconcat(OpcodeStr, 5211 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5212 []>, OpSize; 5213// FIXME: 5214// There's an AssertZext in the way of writing the store pattern 5215// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 5216} 5217 5218let Predicates = [HasAVX] in { 5219 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 5220 def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), 5221 (ins VR128:$src1, i32i8imm:$src2), 5222 "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; 5223} 5224 5225defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 5226 5227 5228/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 5229multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 5230 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5231 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), 5232 !strconcat(OpcodeStr, 5233 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5234 []>, OpSize; 5235// FIXME: 5236// There's an AssertZext in the way of writing the store pattern 5237// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 5238} 5239 5240let Predicates = [HasAVX] in 5241 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 5242 5243defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 5244 5245 5246/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5247multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 5248 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5249 (ins VR128:$src1, i32i8imm:$src2), 5250 !strconcat(OpcodeStr, 5251 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5252 [(set GR32:$dst, 5253 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; 5254 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5255 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), 5256 !strconcat(OpcodeStr, 5257 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5258 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 5259 addr:$dst)]>, OpSize; 5260} 5261 5262let Predicates = [HasAVX] in 5263 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 5264 5265defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 5266 5267/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5268multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 5269 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 5270 (ins VR128:$src1, i32i8imm:$src2), 5271 !strconcat(OpcodeStr, 5272 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5273 [(set GR64:$dst, 5274 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; 5275 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5276 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), 5277 !strconcat(OpcodeStr, 5278 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5279 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 5280 addr:$dst)]>, OpSize, REX_W; 5281} 5282 5283let Predicates = [HasAVX] in 5284 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 5285 5286defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 5287 5288/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 5289/// destination 5290multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 5291 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5292 (ins VR128:$src1, i32i8imm:$src2), 5293 !strconcat(OpcodeStr, 5294 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5295 [(set GR32:$dst, 5296 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 5297 OpSize; 5298 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5299 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), 5300 !strconcat(OpcodeStr, 5301 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5302 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 5303 addr:$dst)]>, OpSize; 5304} 5305 5306let Predicates = [HasAVX] in { 5307 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 5308 def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), 5309 (ins VR128:$src1, i32i8imm:$src2), 5310 "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", 5311 []>, OpSize, VEX; 5312} 5313defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 5314 5315// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 5316def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 5317 imm:$src2))), 5318 addr:$dst), 5319 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 5320 Requires<[HasSSE41]>; 5321def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 5322 imm:$src2))), 5323 addr:$dst), 5324 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 5325 Requires<[HasAVX]>; 5326 5327//===----------------------------------------------------------------------===// 5328// SSE4.1 - Insert Instructions 5329//===----------------------------------------------------------------------===// 5330 5331multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 5332 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5333 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 5334 !if(Is2Addr, 5335 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5336 !strconcat(asm, 5337 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5338 [(set VR128:$dst, 5339 (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; 5340 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5341 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), 5342 !if(Is2Addr, 5343 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5344 !strconcat(asm, 5345 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5346 [(set VR128:$dst, 5347 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 5348 imm:$src3))]>, OpSize; 5349} 5350 5351let Predicates = [HasAVX] in 5352 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 5353let Constraints = "$src1 = $dst" in 5354 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 5355 5356multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 5357 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5358 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 5359 !if(Is2Addr, 5360 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5361 !strconcat(asm, 5362 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5363 [(set VR128:$dst, 5364 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 5365 OpSize; 5366 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5367 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), 5368 !if(Is2Addr, 5369 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5370 !strconcat(asm, 5371 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5372 [(set VR128:$dst, 5373 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 5374 imm:$src3)))]>, OpSize; 5375} 5376 5377let Predicates = [HasAVX] in 5378 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 5379let Constraints = "$src1 = $dst" in 5380 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 5381 5382multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 5383 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5384 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 5385 !if(Is2Addr, 5386 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5387 !strconcat(asm, 5388 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5389 [(set VR128:$dst, 5390 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 5391 OpSize; 5392 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5393 (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), 5394 !if(Is2Addr, 5395 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5396 !strconcat(asm, 5397 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5398 [(set VR128:$dst, 5399 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 5400 imm:$src3)))]>, OpSize; 5401} 5402 5403let Predicates = [HasAVX] in 5404 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 5405let Constraints = "$src1 = $dst" in 5406 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 5407 5408// insertps has a few different modes, there's the first two here below which 5409// are optimized inserts that won't zero arbitrary elements in the destination 5410// vector. The next one matches the intrinsic and could zero arbitrary elements 5411// in the target vector. 5412multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 5413 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 5414 (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), 5415 !if(Is2Addr, 5416 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5417 !strconcat(asm, 5418 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5419 [(set VR128:$dst, 5420 (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, 5421 OpSize; 5422 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 5423 (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), 5424 !if(Is2Addr, 5425 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5426 !strconcat(asm, 5427 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5428 [(set VR128:$dst, 5429 (X86insrtps VR128:$src1, 5430 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 5431 imm:$src3))]>, OpSize; 5432} 5433 5434let Constraints = "$src1 = $dst" in 5435 defm INSERTPS : SS41I_insertf32<0x21, "insertps">; 5436let Predicates = [HasAVX] in 5437 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 5438 5439def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), 5440 (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, 5441 Requires<[HasAVX]>; 5442def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), 5443 (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, 5444 Requires<[HasSSE41]>; 5445 5446//===----------------------------------------------------------------------===// 5447// SSE4.1 - Round Instructions 5448//===----------------------------------------------------------------------===// 5449 5450multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 5451 X86MemOperand x86memop, RegisterClass RC, 5452 PatFrag mem_frag32, PatFrag mem_frag64, 5453 Intrinsic V4F32Int, Intrinsic V2F64Int> { 5454 // Intrinsic operation, reg. 5455 // Vector intrinsic operation, reg 5456 def PSr : SS4AIi8<opcps, MRMSrcReg, 5457 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 5458 !strconcat(OpcodeStr, 5459 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5460 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, 5461 OpSize; 5462 5463 // Vector intrinsic operation, mem 5464 def PSm : Ii8<opcps, MRMSrcMem, 5465 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 5466 !strconcat(OpcodeStr, 5467 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5468 [(set RC:$dst, 5469 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, 5470 TA, OpSize, 5471 Requires<[HasSSE41]>; 5472 5473 // Vector intrinsic operation, reg 5474 def PDr : SS4AIi8<opcpd, MRMSrcReg, 5475 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 5476 !strconcat(OpcodeStr, 5477 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5478 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, 5479 OpSize; 5480 5481 // Vector intrinsic operation, mem 5482 def PDm : SS4AIi8<opcpd, MRMSrcMem, 5483 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 5484 !strconcat(OpcodeStr, 5485 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5486 [(set RC:$dst, 5487 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, 5488 OpSize; 5489} 5490 5491multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, 5492 RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { 5493 // Intrinsic operation, reg. 5494 // Vector intrinsic operation, reg 5495 def PSr_AVX : SS4AIi8<opcps, MRMSrcReg, 5496 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 5497 !strconcat(OpcodeStr, 5498 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5499 []>, OpSize; 5500 5501 // Vector intrinsic operation, mem 5502 def PSm_AVX : Ii8<opcps, MRMSrcMem, 5503 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 5504 !strconcat(OpcodeStr, 5505 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5506 []>, TA, OpSize, Requires<[HasSSE41]>; 5507 5508 // Vector intrinsic operation, reg 5509 def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg, 5510 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 5511 !strconcat(OpcodeStr, 5512 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5513 []>, OpSize; 5514 5515 // Vector intrinsic operation, mem 5516 def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem, 5517 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 5518 !strconcat(OpcodeStr, 5519 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5520 []>, OpSize; 5521} 5522 5523multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 5524 string OpcodeStr, 5525 Intrinsic F32Int, 5526 Intrinsic F64Int, bit Is2Addr = 1> { 5527 // Intrinsic operation, reg. 5528 def SSr : SS4AIi8<opcss, MRMSrcReg, 5529 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 5530 !if(Is2Addr, 5531 !strconcat(OpcodeStr, 5532 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5533 !strconcat(OpcodeStr, 5534 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5535 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 5536 OpSize; 5537 5538 // Intrinsic operation, mem. 5539 def SSm : SS4AIi8<opcss, MRMSrcMem, 5540 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 5541 !if(Is2Addr, 5542 !strconcat(OpcodeStr, 5543 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5544 !strconcat(OpcodeStr, 5545 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5546 [(set VR128:$dst, 5547 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 5548 OpSize; 5549 5550 // Intrinsic operation, reg. 5551 def SDr : SS4AIi8<opcsd, MRMSrcReg, 5552 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 5553 !if(Is2Addr, 5554 !strconcat(OpcodeStr, 5555 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5556 !strconcat(OpcodeStr, 5557 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5558 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 5559 OpSize; 5560 5561 // Intrinsic operation, mem. 5562 def SDm : SS4AIi8<opcsd, MRMSrcMem, 5563 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 5564 !if(Is2Addr, 5565 !strconcat(OpcodeStr, 5566 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5567 !strconcat(OpcodeStr, 5568 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5569 [(set VR128:$dst, 5570 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 5571 OpSize; 5572} 5573 5574multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, 5575 string OpcodeStr> { 5576 // Intrinsic operation, reg. 5577 def SSr_AVX : SS4AIi8<opcss, MRMSrcReg, 5578 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 5579 !strconcat(OpcodeStr, 5580 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5581 []>, OpSize; 5582 5583 // Intrinsic operation, mem. 5584 def SSm_AVX : SS4AIi8<opcss, MRMSrcMem, 5585 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 5586 !strconcat(OpcodeStr, 5587 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5588 []>, OpSize; 5589 5590 // Intrinsic operation, reg. 5591 def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg, 5592 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 5593 !strconcat(OpcodeStr, 5594 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5595 []>, OpSize; 5596 5597 // Intrinsic operation, mem. 5598 def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem, 5599 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 5600 !strconcat(OpcodeStr, 5601 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5602 []>, OpSize; 5603} 5604 5605// FP round - roundss, roundps, roundsd, roundpd 5606let Predicates = [HasAVX] in { 5607 // Intrinsic form 5608 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 5609 memopv4f32, memopv2f64, 5610 int_x86_sse41_round_ps, 5611 int_x86_sse41_round_pd>, VEX; 5612 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 5613 memopv8f32, memopv4f64, 5614 int_x86_avx_round_ps_256, 5615 int_x86_avx_round_pd_256>, VEX; 5616 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 5617 int_x86_sse41_round_ss, 5618 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 5619 5620 // Instructions for the assembler 5621 defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, 5622 VEX; 5623 defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, 5624 VEX; 5625 defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; 5626} 5627 5628defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 5629 memopv4f32, memopv2f64, 5630 int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 5631let Constraints = "$src1 = $dst" in 5632defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 5633 int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 5634 5635//===----------------------------------------------------------------------===// 5636// SSE4.1 - Packed Bit Test 5637//===----------------------------------------------------------------------===// 5638 5639// ptest instruction we'll lower to this in X86ISelLowering primarily from 5640// the intel intrinsic that corresponds to this. 5641let Defs = [EFLAGS], Predicates = [HasAVX] in { 5642def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5643 "vptest\t{$src2, $src1|$src1, $src2}", 5644 [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, 5645 OpSize, VEX; 5646def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5647 "vptest\t{$src2, $src1|$src1, $src2}", 5648 [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, 5649 OpSize, VEX; 5650 5651def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 5652 "vptest\t{$src2, $src1|$src1, $src2}", 5653 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 5654 OpSize, VEX; 5655def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 5656 "vptest\t{$src2, $src1|$src1, $src2}", 5657 [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, 5658 OpSize, VEX; 5659} 5660 5661let Defs = [EFLAGS] in { 5662def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 5663 "ptest \t{$src2, $src1|$src1, $src2}", 5664 [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, 5665 OpSize; 5666def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 5667 "ptest \t{$src2, $src1|$src1, $src2}", 5668 [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, 5669 OpSize; 5670} 5671 5672// The bit test instructions below are AVX only 5673multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 5674 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 5675 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 5676 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5677 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX; 5678 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 5679 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 5680 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 5681 OpSize, VEX; 5682} 5683 5684let Defs = [EFLAGS], Predicates = [HasAVX] in { 5685defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; 5686defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; 5687defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; 5688defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; 5689} 5690 5691//===----------------------------------------------------------------------===// 5692// SSE4.1 - Misc Instructions 5693//===----------------------------------------------------------------------===// 5694 5695let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 5696 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 5697 "popcnt{w}\t{$src, $dst|$dst, $src}", 5698 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 5699 OpSize, XS; 5700 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 5701 "popcnt{w}\t{$src, $dst|$dst, $src}", 5702 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 5703 (implicit EFLAGS)]>, OpSize, XS; 5704 5705 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 5706 "popcnt{l}\t{$src, $dst|$dst, $src}", 5707 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 5708 XS; 5709 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 5710 "popcnt{l}\t{$src, $dst|$dst, $src}", 5711 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 5712 (implicit EFLAGS)]>, XS; 5713 5714 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 5715 "popcnt{q}\t{$src, $dst|$dst, $src}", 5716 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 5717 XS; 5718 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 5719 "popcnt{q}\t{$src, $dst|$dst, $src}", 5720 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 5721 (implicit EFLAGS)]>, XS; 5722} 5723 5724 5725 5726// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 5727multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 5728 Intrinsic IntId128> { 5729 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5730 (ins VR128:$src), 5731 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5732 [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; 5733 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5734 (ins i128mem:$src), 5735 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5736 [(set VR128:$dst, 5737 (IntId128 5738 (bitconvert (memopv8i16 addr:$src))))]>, OpSize; 5739} 5740 5741let Predicates = [HasAVX] in 5742defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 5743 int_x86_sse41_phminposuw>, VEX; 5744defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 5745 int_x86_sse41_phminposuw>; 5746 5747/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator 5748multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, 5749 Intrinsic IntId128, bit Is2Addr = 1> { 5750 let isCommutable = 1 in 5751 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5752 (ins VR128:$src1, VR128:$src2), 5753 !if(Is2Addr, 5754 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5755 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5756 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize; 5757 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5758 (ins VR128:$src1, i128mem:$src2), 5759 !if(Is2Addr, 5760 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5761 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5762 [(set VR128:$dst, 5763 (IntId128 VR128:$src1, 5764 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; 5765} 5766 5767let Predicates = [HasAVX] in { 5768 let isCommutable = 0 in 5769 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 5770 0>, VEX_4V; 5771 defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, 5772 0>, VEX_4V; 5773 defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, 5774 0>, VEX_4V; 5775 defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, 5776 0>, VEX_4V; 5777 defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, 5778 0>, VEX_4V; 5779 defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, 5780 0>, VEX_4V; 5781 defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, 5782 0>, VEX_4V; 5783 defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, 5784 0>, VEX_4V; 5785 defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, 5786 0>, VEX_4V; 5787 defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, 5788 0>, VEX_4V; 5789 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 5790 0>, VEX_4V; 5791 5792 def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), 5793 (VPCMPEQQrr VR128:$src1, VR128:$src2)>; 5794 def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), 5795 (VPCMPEQQrm VR128:$src1, addr:$src2)>; 5796} 5797 5798let Constraints = "$src1 = $dst" in { 5799 let isCommutable = 0 in 5800 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; 5801 defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; 5802 defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; 5803 defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; 5804 defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; 5805 defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; 5806 defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; 5807 defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; 5808 defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; 5809 defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; 5810 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; 5811} 5812 5813def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), 5814 (PCMPEQQrr VR128:$src1, VR128:$src2)>; 5815def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), 5816 (PCMPEQQrm VR128:$src1, addr:$src2)>; 5817 5818/// SS48I_binop_rm - Simple SSE41 binary operator. 5819multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5820 ValueType OpVT, bit Is2Addr = 1> { 5821 let isCommutable = 1 in 5822 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5823 (ins VR128:$src1, VR128:$src2), 5824 !if(Is2Addr, 5825 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5826 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5827 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, 5828 OpSize; 5829 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5830 (ins VR128:$src1, i128mem:$src2), 5831 !if(Is2Addr, 5832 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5833 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5834 [(set VR128:$dst, (OpNode VR128:$src1, 5835 (bc_v4i32 (memopv2i64 addr:$src2))))]>, 5836 OpSize; 5837} 5838 5839let Predicates = [HasAVX] in 5840 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; 5841let Constraints = "$src1 = $dst" in 5842 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; 5843 5844/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 5845multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 5846 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 5847 X86MemOperand x86memop, bit Is2Addr = 1> { 5848 let isCommutable = 1 in 5849 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 5850 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 5851 !if(Is2Addr, 5852 !strconcat(OpcodeStr, 5853 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5854 !strconcat(OpcodeStr, 5855 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5856 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 5857 OpSize; 5858 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 5859 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 5860 !if(Is2Addr, 5861 !strconcat(OpcodeStr, 5862 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5863 !strconcat(OpcodeStr, 5864 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5865 [(set RC:$dst, 5866 (IntId RC:$src1, 5867 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 5868 OpSize; 5869} 5870 5871let Predicates = [HasAVX] in { 5872 let isCommutable = 0 in { 5873 defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, 5874 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5875 defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, 5876 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5877 defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", 5878 int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; 5879 defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", 5880 int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; 5881 defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, 5882 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5883 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 5884 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5885 } 5886 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 5887 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5888 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 5889 VR128, memopv16i8, i128mem, 0>, VEX_4V; 5890 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 5891 VR256, memopv32i8, i256mem, 0>, VEX_4V; 5892} 5893 5894let Constraints = "$src1 = $dst" in { 5895 let isCommutable = 0 in { 5896 defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, 5897 VR128, memopv16i8, i128mem>; 5898 defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, 5899 VR128, memopv16i8, i128mem>; 5900 defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, 5901 VR128, memopv16i8, i128mem>; 5902 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 5903 VR128, memopv16i8, i128mem>; 5904 } 5905 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 5906 VR128, memopv16i8, i128mem>; 5907 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 5908 VR128, memopv16i8, i128mem>; 5909} 5910 5911/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 5912let Predicates = [HasAVX] in { 5913multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 5914 RegisterClass RC, X86MemOperand x86memop, 5915 PatFrag mem_frag, Intrinsic IntId> { 5916 def rr : I<opc, MRMSrcReg, (outs RC:$dst), 5917 (ins RC:$src1, RC:$src2, RC:$src3), 5918 !strconcat(OpcodeStr, 5919 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5920 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 5921 SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 5922 5923 def rm : I<opc, MRMSrcMem, (outs RC:$dst), 5924 (ins RC:$src1, x86memop:$src2, RC:$src3), 5925 !strconcat(OpcodeStr, 5926 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5927 [(set RC:$dst, 5928 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 5929 RC:$src3))], 5930 SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 5931} 5932} 5933 5934defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, 5935 memopv16i8, int_x86_sse41_blendvpd>; 5936defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, 5937 memopv16i8, int_x86_sse41_blendvps>; 5938defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 5939 memopv16i8, int_x86_sse41_pblendvb>; 5940defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, 5941 memopv32i8, int_x86_avx_blendv_pd_256>; 5942defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, 5943 memopv32i8, int_x86_avx_blendv_ps_256>; 5944 5945let Predicates = [HasAVX] in { 5946 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 5947 (v16i8 VR128:$src2))), 5948 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 5949 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 5950 (v4i32 VR128:$src2))), 5951 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 5952 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 5953 (v4f32 VR128:$src2))), 5954 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 5955 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 5956 (v2i64 VR128:$src2))), 5957 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 5958 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 5959 (v2f64 VR128:$src2))), 5960 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 5961 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 5962 (v8i32 VR256:$src2))), 5963 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 5964 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 5965 (v8f32 VR256:$src2))), 5966 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 5967 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 5968 (v4i64 VR256:$src2))), 5969 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 5970 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 5971 (v4f64 VR256:$src2))), 5972 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 5973} 5974 5975/// SS41I_ternary_int - SSE 4.1 ternary operator 5976let Uses = [XMM0], Constraints = "$src1 = $dst" in { 5977 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5978 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 5979 (ins VR128:$src1, VR128:$src2), 5980 !strconcat(OpcodeStr, 5981 "\t{$src2, $dst|$dst, $src2}"), 5982 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, 5983 OpSize; 5984 5985 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 5986 (ins VR128:$src1, i128mem:$src2), 5987 !strconcat(OpcodeStr, 5988 "\t{$src2, $dst|$dst, $src2}"), 5989 [(set VR128:$dst, 5990 (IntId VR128:$src1, 5991 (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; 5992 } 5993} 5994 5995defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; 5996defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; 5997defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; 5998 5999let Predicates = [HasSSE41] in { 6000 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 6001 (v16i8 VR128:$src2))), 6002 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 6003 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 6004 (v4i32 VR128:$src2))), 6005 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6006 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 6007 (v4f32 VR128:$src2))), 6008 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6009 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 6010 (v2i64 VR128:$src2))), 6011 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6012 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 6013 (v2f64 VR128:$src2))), 6014 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6015} 6016 6017let Predicates = [HasAVX] in 6018def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6019 "vmovntdqa\t{$src, $dst|$dst, $src}", 6020 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 6021 OpSize, VEX; 6022def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6023 "movntdqa\t{$src, $dst|$dst, $src}", 6024 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 6025 OpSize; 6026 6027//===----------------------------------------------------------------------===// 6028// SSE4.2 - Compare Instructions 6029//===----------------------------------------------------------------------===// 6030 6031/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator 6032multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, 6033 Intrinsic IntId128, bit Is2Addr = 1> { 6034 def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), 6035 (ins VR128:$src1, VR128:$src2), 6036 !if(Is2Addr, 6037 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6038 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6039 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 6040 OpSize; 6041 def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), 6042 (ins VR128:$src1, i128mem:$src2), 6043 !if(Is2Addr, 6044 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6045 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6046 [(set VR128:$dst, 6047 (IntId128 VR128:$src1, 6048 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; 6049} 6050 6051let Predicates = [HasAVX] in { 6052 defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 6053 0>, VEX_4V; 6054 6055 def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), 6056 (VPCMPGTQrr VR128:$src1, VR128:$src2)>; 6057 def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), 6058 (VPCMPGTQrm VR128:$src1, addr:$src2)>; 6059} 6060 6061let Constraints = "$src1 = $dst" in 6062 defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; 6063 6064def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), 6065 (PCMPGTQrr VR128:$src1, VR128:$src2)>; 6066def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), 6067 (PCMPGTQrm VR128:$src1, addr:$src2)>; 6068 6069//===----------------------------------------------------------------------===// 6070// SSE4.2 - String/text Processing Instructions 6071//===----------------------------------------------------------------------===// 6072 6073// Packed Compare Implicit Length Strings, Return Mask 6074multiclass pseudo_pcmpistrm<string asm> { 6075 def REG : PseudoI<(outs VR128:$dst), 6076 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6077 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 6078 imm:$src3))]>; 6079 def MEM : PseudoI<(outs VR128:$dst), 6080 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6081 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 6082 VR128:$src1, (load addr:$src2), imm:$src3))]>; 6083} 6084 6085let Defs = [EFLAGS], usesCustomInserter = 1 in { 6086 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; 6087 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; 6088} 6089 6090let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { 6091 def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), 6092 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6093 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; 6094 def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), 6095 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6096 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; 6097} 6098 6099let Defs = [XMM0, EFLAGS] in { 6100 def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), 6101 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6102 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; 6103 def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), 6104 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6105 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; 6106} 6107 6108// Packed Compare Explicit Length Strings, Return Mask 6109multiclass pseudo_pcmpestrm<string asm> { 6110 def REG : PseudoI<(outs VR128:$dst), 6111 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6112 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 6113 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 6114 def MEM : PseudoI<(outs VR128:$dst), 6115 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6116 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 6117 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; 6118} 6119 6120let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 6121 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; 6122 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; 6123} 6124 6125let Predicates = [HasAVX], 6126 Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { 6127 def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), 6128 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6129 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; 6130 def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), 6131 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6132 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; 6133} 6134 6135let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { 6136 def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), 6137 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6138 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; 6139 def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), 6140 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6141 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; 6142} 6143 6144// Packed Compare Implicit Length Strings, Return Index 6145let Defs = [ECX, EFLAGS] in { 6146 multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { 6147 def rr : SS42AI<0x63, MRMSrcReg, (outs), 6148 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6149 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6150 [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), 6151 (implicit EFLAGS)]>, OpSize; 6152 def rm : SS42AI<0x63, MRMSrcMem, (outs), 6153 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6154 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 6155 [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), 6156 (implicit EFLAGS)]>, OpSize; 6157 } 6158} 6159 6160let Predicates = [HasAVX] in { 6161defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, 6162 VEX; 6163defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, 6164 VEX; 6165defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, 6166 VEX; 6167defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, 6168 VEX; 6169defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, 6170 VEX; 6171defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, 6172 VEX; 6173} 6174 6175defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; 6176defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; 6177defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; 6178defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; 6179defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; 6180defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; 6181 6182// Packed Compare Explicit Length Strings, Return Index 6183let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { 6184 multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { 6185 def rr : SS42AI<0x61, MRMSrcReg, (outs), 6186 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6187 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6188 [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), 6189 (implicit EFLAGS)]>, OpSize; 6190 def rm : SS42AI<0x61, MRMSrcMem, (outs), 6191 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6192 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 6193 [(set ECX, 6194 (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), 6195 (implicit EFLAGS)]>, OpSize; 6196 } 6197} 6198 6199let Predicates = [HasAVX] in { 6200defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, 6201 VEX; 6202defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, 6203 VEX; 6204defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, 6205 VEX; 6206defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, 6207 VEX; 6208defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, 6209 VEX; 6210defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, 6211 VEX; 6212} 6213 6214defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; 6215defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; 6216defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; 6217defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; 6218defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; 6219defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; 6220 6221//===----------------------------------------------------------------------===// 6222// SSE4.2 - CRC Instructions 6223//===----------------------------------------------------------------------===// 6224 6225// No CRC instructions have AVX equivalents 6226 6227// crc intrinsic instruction 6228// This set of instructions are only rm, the only difference is the size 6229// of r and m. 6230let Constraints = "$src1 = $dst" in { 6231 def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), 6232 (ins GR32:$src1, i8mem:$src2), 6233 "crc32{b} \t{$src2, $src1|$src1, $src2}", 6234 [(set GR32:$dst, 6235 (int_x86_sse42_crc32_32_8 GR32:$src1, 6236 (load addr:$src2)))]>; 6237 def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), 6238 (ins GR32:$src1, GR8:$src2), 6239 "crc32{b} \t{$src2, $src1|$src1, $src2}", 6240 [(set GR32:$dst, 6241 (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>; 6242 def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), 6243 (ins GR32:$src1, i16mem:$src2), 6244 "crc32{w} \t{$src2, $src1|$src1, $src2}", 6245 [(set GR32:$dst, 6246 (int_x86_sse42_crc32_32_16 GR32:$src1, 6247 (load addr:$src2)))]>, 6248 OpSize; 6249 def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), 6250 (ins GR32:$src1, GR16:$src2), 6251 "crc32{w} \t{$src2, $src1|$src1, $src2}", 6252 [(set GR32:$dst, 6253 (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>, 6254 OpSize; 6255 def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), 6256 (ins GR32:$src1, i32mem:$src2), 6257 "crc32{l} \t{$src2, $src1|$src1, $src2}", 6258 [(set GR32:$dst, 6259 (int_x86_sse42_crc32_32_32 GR32:$src1, 6260 (load addr:$src2)))]>; 6261 def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), 6262 (ins GR32:$src1, GR32:$src2), 6263 "crc32{l} \t{$src2, $src1|$src1, $src2}", 6264 [(set GR32:$dst, 6265 (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>; 6266 def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), 6267 (ins GR64:$src1, i8mem:$src2), 6268 "crc32{b} \t{$src2, $src1|$src1, $src2}", 6269 [(set GR64:$dst, 6270 (int_x86_sse42_crc32_64_8 GR64:$src1, 6271 (load addr:$src2)))]>, 6272 REX_W; 6273 def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), 6274 (ins GR64:$src1, GR8:$src2), 6275 "crc32{b} \t{$src2, $src1|$src1, $src2}", 6276 [(set GR64:$dst, 6277 (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>, 6278 REX_W; 6279 def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), 6280 (ins GR64:$src1, i64mem:$src2), 6281 "crc32{q} \t{$src2, $src1|$src1, $src2}", 6282 [(set GR64:$dst, 6283 (int_x86_sse42_crc32_64_64 GR64:$src1, 6284 (load addr:$src2)))]>, 6285 REX_W; 6286 def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), 6287 (ins GR64:$src1, GR64:$src2), 6288 "crc32{q} \t{$src2, $src1|$src1, $src2}", 6289 [(set GR64:$dst, 6290 (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>, 6291 REX_W; 6292} 6293 6294//===----------------------------------------------------------------------===// 6295// AES-NI Instructions 6296//===----------------------------------------------------------------------===// 6297 6298multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 6299 Intrinsic IntId128, bit Is2Addr = 1> { 6300 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 6301 (ins VR128:$src1, VR128:$src2), 6302 !if(Is2Addr, 6303 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6304 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6305 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 6306 OpSize; 6307 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 6308 (ins VR128:$src1, i128mem:$src2), 6309 !if(Is2Addr, 6310 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6311 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6312 [(set VR128:$dst, 6313 (IntId128 VR128:$src1, 6314 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; 6315} 6316 6317// Perform One Round of an AES Encryption/Decryption Flow 6318let Predicates = [HasAVX, HasAES] in { 6319 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 6320 int_x86_aesni_aesenc, 0>, VEX_4V; 6321 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 6322 int_x86_aesni_aesenclast, 0>, VEX_4V; 6323 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 6324 int_x86_aesni_aesdec, 0>, VEX_4V; 6325 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 6326 int_x86_aesni_aesdeclast, 0>, VEX_4V; 6327} 6328 6329let Constraints = "$src1 = $dst" in { 6330 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 6331 int_x86_aesni_aesenc>; 6332 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 6333 int_x86_aesni_aesenclast>; 6334 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 6335 int_x86_aesni_aesdec>; 6336 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 6337 int_x86_aesni_aesdeclast>; 6338} 6339 6340let Predicates = [HasAES] in { 6341 def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), 6342 (AESENCrr VR128:$src1, VR128:$src2)>; 6343 def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), 6344 (AESENCrm VR128:$src1, addr:$src2)>; 6345 def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), 6346 (AESENCLASTrr VR128:$src1, VR128:$src2)>; 6347 def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), 6348 (AESENCLASTrm VR128:$src1, addr:$src2)>; 6349 def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), 6350 (AESDECrr VR128:$src1, VR128:$src2)>; 6351 def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), 6352 (AESDECrm VR128:$src1, addr:$src2)>; 6353 def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), 6354 (AESDECLASTrr VR128:$src1, VR128:$src2)>; 6355 def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), 6356 (AESDECLASTrm VR128:$src1, addr:$src2)>; 6357} 6358 6359let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in { 6360 def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), 6361 (VAESENCrr VR128:$src1, VR128:$src2)>; 6362 def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), 6363 (VAESENCrm VR128:$src1, addr:$src2)>; 6364 def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), 6365 (VAESENCLASTrr VR128:$src1, VR128:$src2)>; 6366 def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), 6367 (VAESENCLASTrm VR128:$src1, addr:$src2)>; 6368 def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), 6369 (VAESDECrr VR128:$src1, VR128:$src2)>; 6370 def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), 6371 (VAESDECrm VR128:$src1, addr:$src2)>; 6372 def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), 6373 (VAESDECLASTrr VR128:$src1, VR128:$src2)>; 6374 def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), 6375 (VAESDECLASTrm VR128:$src1, addr:$src2)>; 6376} 6377 6378// Perform the AES InvMixColumn Transformation 6379let Predicates = [HasAVX, HasAES] in { 6380 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6381 (ins VR128:$src1), 6382 "vaesimc\t{$src1, $dst|$dst, $src1}", 6383 [(set VR128:$dst, 6384 (int_x86_aesni_aesimc VR128:$src1))]>, 6385 OpSize, VEX; 6386 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6387 (ins i128mem:$src1), 6388 "vaesimc\t{$src1, $dst|$dst, $src1}", 6389 [(set VR128:$dst, 6390 (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, 6391 OpSize, VEX; 6392} 6393def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 6394 (ins VR128:$src1), 6395 "aesimc\t{$src1, $dst|$dst, $src1}", 6396 [(set VR128:$dst, 6397 (int_x86_aesni_aesimc VR128:$src1))]>, 6398 OpSize; 6399def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 6400 (ins i128mem:$src1), 6401 "aesimc\t{$src1, $dst|$dst, $src1}", 6402 [(set VR128:$dst, 6403 (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, 6404 OpSize; 6405 6406// AES Round Key Generation Assist 6407let Predicates = [HasAVX, HasAES] in { 6408 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6409 (ins VR128:$src1, i8imm:$src2), 6410 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6411 [(set VR128:$dst, 6412 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 6413 OpSize, VEX; 6414 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6415 (ins i128mem:$src1, i8imm:$src2), 6416 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6417 [(set VR128:$dst, 6418 (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), 6419 imm:$src2))]>, 6420 OpSize, VEX; 6421} 6422def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 6423 (ins VR128:$src1, i8imm:$src2), 6424 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6425 [(set VR128:$dst, 6426 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 6427 OpSize; 6428def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 6429 (ins i128mem:$src1, i8imm:$src2), 6430 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6431 [(set VR128:$dst, 6432 (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), 6433 imm:$src2))]>, 6434 OpSize; 6435 6436//===----------------------------------------------------------------------===// 6437// CLMUL Instructions 6438//===----------------------------------------------------------------------===// 6439 6440// Carry-less Multiplication instructions 6441let Constraints = "$src1 = $dst" in { 6442def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 6443 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6444 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6445 []>; 6446 6447def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 6448 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6449 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 6450 []>; 6451} 6452 6453// AVX carry-less Multiplication instructions 6454def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 6455 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6456 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6457 []>; 6458 6459def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 6460 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6461 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6462 []>; 6463 6464 6465multiclass pclmul_alias<string asm, int immop> { 6466 def : InstAlias<!strconcat("pclmul", asm, 6467 "dq {$src, $dst|$dst, $src}"), 6468 (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; 6469 6470 def : InstAlias<!strconcat("pclmul", asm, 6471 "dq {$src, $dst|$dst, $src}"), 6472 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; 6473 6474 def : InstAlias<!strconcat("vpclmul", asm, 6475 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 6476 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; 6477 6478 def : InstAlias<!strconcat("vpclmul", asm, 6479 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 6480 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; 6481} 6482defm : pclmul_alias<"hqhq", 0x11>; 6483defm : pclmul_alias<"hqlq", 0x01>; 6484defm : pclmul_alias<"lqhq", 0x10>; 6485defm : pclmul_alias<"lqlq", 0x00>; 6486 6487//===----------------------------------------------------------------------===// 6488// AVX Instructions 6489//===----------------------------------------------------------------------===// 6490 6491//===----------------------------------------------------------------------===// 6492// VBROADCAST - Load from memory and broadcast to all elements of the 6493// destination operand 6494// 6495class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 6496 X86MemOperand x86memop, Intrinsic Int> : 6497 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 6498 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6499 [(set RC:$dst, (Int addr:$src))]>, VEX; 6500 6501def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, 6502 int_x86_avx_vbroadcastss>; 6503def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, 6504 int_x86_avx_vbroadcastss_256>; 6505def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, 6506 int_x86_avx_vbroadcast_sd_256>; 6507def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 6508 int_x86_avx_vbroadcastf128_pd_256>; 6509 6510def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 6511 (VBROADCASTF128 addr:$src)>; 6512 6513def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 6514 (VBROADCASTSSY addr:$src)>; 6515def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 6516 (VBROADCASTSD addr:$src)>; 6517def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), 6518 (VBROADCASTSSY addr:$src)>; 6519def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), 6520 (VBROADCASTSD addr:$src)>; 6521 6522def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), 6523 (VBROADCASTSS addr:$src)>; 6524def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 6525 (VBROADCASTSS addr:$src)>; 6526 6527//===----------------------------------------------------------------------===// 6528// VINSERTF128 - Insert packed floating-point values 6529// 6530def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 6531 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 6532 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6533 []>, VEX_4V; 6534def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 6535 (ins VR256:$src1, f128mem:$src2, i8imm:$src3), 6536 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6537 []>, VEX_4V; 6538 6539def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), 6540 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; 6541def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), 6542 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; 6543def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), 6544 (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; 6545 6546def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 6547 (i32 imm)), 6548 (VINSERTF128rr VR256:$src1, VR128:$src2, 6549 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6550def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 6551 (i32 imm)), 6552 (VINSERTF128rr VR256:$src1, VR128:$src2, 6553 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6554def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 6555 (i32 imm)), 6556 (VINSERTF128rr VR256:$src1, VR128:$src2, 6557 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6558def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 6559 (i32 imm)), 6560 (VINSERTF128rr VR256:$src1, VR128:$src2, 6561 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6562def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 6563 (i32 imm)), 6564 (VINSERTF128rr VR256:$src1, VR128:$src2, 6565 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6566def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 6567 (i32 imm)), 6568 (VINSERTF128rr VR256:$src1, VR128:$src2, 6569 (INSERT_get_vinsertf128_imm VR256:$ins))>; 6570 6571//===----------------------------------------------------------------------===// 6572// VEXTRACTF128 - Extract packed floating-point values 6573// 6574def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 6575 (ins VR256:$src1, i8imm:$src2), 6576 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6577 []>, VEX; 6578def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 6579 (ins f128mem:$dst, VR256:$src1, i8imm:$src2), 6580 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6581 []>, VEX; 6582 6583def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), 6584 (VEXTRACTF128rr VR256:$src1, imm:$src2)>; 6585def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), 6586 (VEXTRACTF128rr VR256:$src1, imm:$src2)>; 6587def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), 6588 (VEXTRACTF128rr VR256:$src1, imm:$src2)>; 6589 6590def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6591 (v4f32 (VEXTRACTF128rr 6592 (v8f32 VR256:$src1), 6593 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6594def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6595 (v2f64 (VEXTRACTF128rr 6596 (v4f64 VR256:$src1), 6597 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6598def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6599 (v4i32 (VEXTRACTF128rr 6600 (v8i32 VR256:$src1), 6601 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6602def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6603 (v2i64 (VEXTRACTF128rr 6604 (v4i64 VR256:$src1), 6605 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6606def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6607 (v8i16 (VEXTRACTF128rr 6608 (v16i16 VR256:$src1), 6609 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6610def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), 6611 (v16i8 (VEXTRACTF128rr 6612 (v32i8 VR256:$src1), 6613 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 6614 6615//===----------------------------------------------------------------------===// 6616// VMASKMOV - Conditional SIMD Packed Loads and Stores 6617// 6618multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 6619 Intrinsic IntLd, Intrinsic IntLd256, 6620 Intrinsic IntSt, Intrinsic IntSt256, 6621 PatFrag pf128, PatFrag pf256> { 6622 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 6623 (ins VR128:$src1, f128mem:$src2), 6624 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6625 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 6626 VEX_4V; 6627 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 6628 (ins VR256:$src1, f256mem:$src2), 6629 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6630 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 6631 VEX_4V; 6632 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 6633 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 6634 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6635 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 6636 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 6637 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 6638 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6639 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; 6640} 6641 6642defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 6643 int_x86_avx_maskload_ps, 6644 int_x86_avx_maskload_ps_256, 6645 int_x86_avx_maskstore_ps, 6646 int_x86_avx_maskstore_ps_256, 6647 memopv4f32, memopv8f32>; 6648defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 6649 int_x86_avx_maskload_pd, 6650 int_x86_avx_maskload_pd_256, 6651 int_x86_avx_maskstore_pd, 6652 int_x86_avx_maskstore_pd_256, 6653 memopv2f64, memopv4f64>; 6654 6655//===----------------------------------------------------------------------===// 6656// VPERMIL - Permute Single and Double Floating-Point Values 6657// 6658multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 6659 RegisterClass RC, X86MemOperand x86memop_f, 6660 X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, 6661 Intrinsic IntVar, Intrinsic IntImm> { 6662 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 6663 (ins RC:$src1, RC:$src2), 6664 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6665 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; 6666 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 6667 (ins RC:$src1, x86memop_i:$src2), 6668 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6669 [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; 6670 6671 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 6672 (ins RC:$src1, i8imm:$src2), 6673 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6674 [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX; 6675 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 6676 (ins x86memop_f:$src1, i8imm:$src2), 6677 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6678 [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; 6679} 6680 6681defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 6682 memopv4f32, memopv4i32, 6683 int_x86_avx_vpermilvar_ps, 6684 int_x86_avx_vpermil_ps>; 6685defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 6686 memopv8f32, memopv8i32, 6687 int_x86_avx_vpermilvar_ps_256, 6688 int_x86_avx_vpermil_ps_256>; 6689defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 6690 memopv2f64, memopv2i64, 6691 int_x86_avx_vpermilvar_pd, 6692 int_x86_avx_vpermil_pd>; 6693defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 6694 memopv4f64, memopv4i64, 6695 int_x86_avx_vpermilvar_pd_256, 6696 int_x86_avx_vpermil_pd_256>; 6697 6698def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), 6699 (VPERMILPSYri VR256:$src1, imm:$imm)>; 6700def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), 6701 (VPERMILPDYri VR256:$src1, imm:$imm)>; 6702def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), 6703 (VPERMILPSYri VR256:$src1, imm:$imm)>; 6704def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), 6705 (VPERMILPDYri VR256:$src1, imm:$imm)>; 6706 6707//===----------------------------------------------------------------------===// 6708// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 6709// 6710def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 6711 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 6712 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6713 []>, VEX_4V; 6714def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 6715 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 6716 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 6717 []>, VEX_4V; 6718 6719def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), 6720 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; 6721def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), 6722 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; 6723def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), 6724 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; 6725 6726def : Pat<(int_x86_avx_vperm2f128_ps_256 6727 VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), 6728 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; 6729def : Pat<(int_x86_avx_vperm2f128_pd_256 6730 VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), 6731 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; 6732def : Pat<(int_x86_avx_vperm2f128_si_256 6733 VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), 6734 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; 6735 6736def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6737 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6738def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6739 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6740def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6741 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6742def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6743 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6744def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6745 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6746def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6747 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 6748 6749//===----------------------------------------------------------------------===// 6750// VZERO - Zero YMM registers 6751// 6752let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 6753 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 6754 // Zero All YMM registers 6755 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 6756 [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; 6757 6758 // Zero Upper bits of YMM registers 6759 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 6760 [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; 6761} 6762 6763//===----------------------------------------------------------------------===// 6764// Half precision conversion instructions 6765// 6766let Predicates = [HasAVX, HasF16C] in { 6767 def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 6768 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 6769 def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 6770 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 6771 def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 6772 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 6773 def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 6774 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 6775 def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), 6776 (ins VR128:$src1, i32i8imm:$src2), 6777 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 6778 TA, OpSize, VEX; 6779 def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 6780 (ins VR128:$src1, i32i8imm:$src2), 6781 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 6782 TA, OpSize, VEX; 6783 def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), 6784 (ins VR256:$src1, i32i8imm:$src2), 6785 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 6786 TA, OpSize, VEX; 6787 def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 6788 (ins VR256:$src1, i32i8imm:$src2), 6789 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 6790 TA, OpSize, VEX; 6791} 6792