• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art HIDDEN {
25 namespace x86_64 {
26 
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   uint8_t ByteZero, ByteOne, ByteTwo;
495   bool is_twobyte_form = false;
496   // Instruction VEX Prefix
497   uint8_t rex = src.rex();
498   bool Rex_x = rex & GET_REX_X;
499   bool Rex_b = rex & GET_REX_B;
500   if (!Rex_b && !Rex_x) {
501     is_twobyte_form = true;
502   }
503   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504   if (is_twobyte_form) {
505     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507                                    vvvv_reg,
508                                    SET_VEX_L_128,
509                                    SET_VEX_PP_NONE);
510   } else {
511     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512                                    Rex_x,
513                                    Rex_b,
514                                    SET_VEX_M_0F);
515     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516                                    SET_VEX_L_128,
517                                    SET_VEX_PP_NONE);
518   }
519   EmitUint8(ByteZero);
520   EmitUint8(ByteOne);
521   if (!is_twobyte_form) {
522     EmitUint8(ByteTwo);
523   }
524   // Instruction Opcode
525   EmitUint8(0x28);
526   // Instruction Operands
527   EmitOperand(dst.LowBits(), src);
528 }
529 
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531   if (CpuHasAVXorAVX2FeatureFlag()) {
532     vmovups(dst, src);
533     return;
534   }
535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536   EmitOptionalRex32(dst, src);
537   EmitUint8(0x0F);
538   EmitUint8(0x10);
539   EmitOperand(dst.LowBits(), src);
540 }
541 
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544   DCHECK(CpuHasAVXorAVX2FeatureFlag());
545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546   uint8_t ByteZero, ByteOne, ByteTwo;
547   bool is_twobyte_form = false;
548   // Instruction VEX Prefix
549   uint8_t rex = src.rex();
550   bool Rex_x = rex & GET_REX_X;
551   bool Rex_b = rex & GET_REX_B;
552   if (!Rex_x && !Rex_b) {
553     is_twobyte_form = true;
554   }
555   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556   if (is_twobyte_form) {
557     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559                                    vvvv_reg,
560                                    SET_VEX_L_128,
561                                    SET_VEX_PP_NONE);
562   } else {
563     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564                                    Rex_x,
565                                    Rex_b,
566                                    SET_VEX_M_0F);
567     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568                                    SET_VEX_L_128,
569                                    SET_VEX_PP_NONE);
570   }
571   EmitUint8(ByteZero);
572   EmitUint8(ByteOne);
573   if (!is_twobyte_form) {
574     EmitUint8(ByteTwo);
575   }
576   // Instruction Opcode
577   EmitUint8(0x10);
578   // Instruction Operands
579   EmitOperand(dst.LowBits(), src);
580 }
581 
582 
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584   if (CpuHasAVXorAVX2FeatureFlag()) {
585     vmovaps(dst, src);
586     return;
587   }
588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x29);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597   DCHECK(CpuHasAVXorAVX2FeatureFlag());
598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599   uint8_t ByteZero, ByteOne, ByteTwo;
600   bool is_twobyte_form = false;
601 
602   // Instruction VEX Prefix
603   uint8_t rex = dst.rex();
604   bool Rex_x = rex & GET_REX_X;
605   bool Rex_b = rex & GET_REX_B;
606   if (!Rex_b && !Rex_x) {
607     is_twobyte_form = true;
608   }
609   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610   if (is_twobyte_form) {
611     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613                                    vvvv_reg,
614                                    SET_VEX_L_128,
615                                    SET_VEX_PP_NONE);
616   } else {
617     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618                                    Rex_x,
619                                    Rex_b,
620                                    SET_VEX_M_0F);
621     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622                                    SET_VEX_L_128,
623                                    SET_VEX_PP_NONE);
624   }
625   EmitUint8(ByteZero);
626   EmitUint8(ByteOne);
627   if (!is_twobyte_form) {
628     EmitUint8(ByteTwo);
629   }
630   // Instruction Opcode
631   EmitUint8(0x29);
632   // Instruction Operands
633   EmitOperand(src.LowBits(), dst);
634 }
635 
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637   if (CpuHasAVXorAVX2FeatureFlag()) {
638     vmovups(dst, src);
639     return;
640   }
641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642   EmitOptionalRex32(src, dst);
643   EmitUint8(0x0F);
644   EmitUint8(0x11);
645   EmitOperand(src.LowBits(), dst);
646 }
647 
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650   DCHECK(CpuHasAVXorAVX2FeatureFlag());
651   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652   uint8_t ByteZero, ByteOne, ByteTwo;
653   bool is_twobyte_form = false;
654 
655   // Instruction VEX Prefix
656   uint8_t rex = dst.rex();
657   bool Rex_x = rex & GET_REX_X;
658   bool Rex_b = rex & GET_REX_B;
659   if (!Rex_b && !Rex_x) {
660     is_twobyte_form = true;
661   }
662   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663   if (is_twobyte_form) {
664     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666                                    vvvv_reg,
667                                    SET_VEX_L_128,
668                                    SET_VEX_PP_NONE);
669   } else {
670     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671                                    Rex_x,
672                                    Rex_b,
673                                    SET_VEX_M_0F);
674     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675                                    SET_VEX_L_128,
676                                    SET_VEX_PP_NONE);
677   }
678   EmitUint8(ByteZero);
679   EmitUint8(ByteOne);
680   if (!is_twobyte_form) {
681     EmitUint8(ByteTwo);
682   }
683   // Instruction Opcode
684   EmitUint8(0x11);
685   // Instruction Operands
686   EmitOperand(src.LowBits(), dst);
687 }
688 
689 
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692   EmitUint8(0xF3);
693   EmitOptionalRex32(dst, src);
694   EmitUint8(0x0F);
695   EmitUint8(0x10);
696   EmitOperand(dst.LowBits(), src);
697 }
698 
699 
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702   EmitUint8(0xF3);
703   EmitOptionalRex32(src, dst);
704   EmitUint8(0x0F);
705   EmitUint8(0x11);
706   EmitOperand(src.LowBits(), dst);
707 }
708 
709 
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712   EmitUint8(0xF3);
713   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
714   EmitUint8(0x0F);
715   EmitUint8(0x11);
716   EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718 
719 
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722   EmitRex64(dst, src);
723   EmitUint8(0x63);
724   EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726 
727 
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730   EmitRex64(dst, src);
731   EmitUint8(0x63);
732   EmitOperand(dst.LowBits(), src);
733 }
734 
735 
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737   movd(dst, src, true);
738 }
739 
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741   movd(dst, src, true);
742 }
743 
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746   EmitUint8(0x66);
747   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748   EmitUint8(0x0F);
749   EmitUint8(0x6E);
750   EmitOperand(dst.LowBits(), Operand(src));
751 }
752 
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755   EmitUint8(0x66);
756   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757   EmitUint8(0x0F);
758   EmitUint8(0x7E);
759   EmitOperand(src.LowBits(), Operand(dst));
760 }
761 
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764   EmitUint8(0xF3);
765   EmitOptionalRex32(dst, src);
766   EmitUint8(0x0F);
767   EmitUint8(0x58);
768   EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770 
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773   EmitUint8(0xF3);
774   EmitOptionalRex32(dst, src);
775   EmitUint8(0x0F);
776   EmitUint8(0x58);
777   EmitOperand(dst.LowBits(), src);
778 }
779 
780 
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783   EmitUint8(0xF3);
784   EmitOptionalRex32(dst, src);
785   EmitUint8(0x0F);
786   EmitUint8(0x5C);
787   EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789 
790 
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793   EmitUint8(0xF3);
794   EmitOptionalRex32(dst, src);
795   EmitUint8(0x0F);
796   EmitUint8(0x5C);
797   EmitOperand(dst.LowBits(), src);
798 }
799 
800 
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803   EmitUint8(0xF3);
804   EmitOptionalRex32(dst, src);
805   EmitUint8(0x0F);
806   EmitUint8(0x59);
807   EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809 
810 
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813   EmitUint8(0xF3);
814   EmitOptionalRex32(dst, src);
815   EmitUint8(0x0F);
816   EmitUint8(0x59);
817   EmitOperand(dst.LowBits(), src);
818 }
819 
820 
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823   EmitUint8(0xF3);
824   EmitOptionalRex32(dst, src);
825   EmitUint8(0x0F);
826   EmitUint8(0x5E);
827   EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829 
830 
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833   EmitUint8(0xF3);
834   EmitOptionalRex32(dst, src);
835   EmitUint8(0x0F);
836   EmitUint8(0x5E);
837   EmitOperand(dst.LowBits(), src);
838 }
839 
840 
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843   EmitOptionalRex32(dst, src);
844   EmitUint8(0x0F);
845   EmitUint8(0x58);
846   EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848 
849 
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852   EmitOptionalRex32(dst, src);
853   EmitUint8(0x0F);
854   EmitUint8(0x5C);
855   EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857 
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859   DCHECK(CpuHasAVXorAVX2FeatureFlag());
860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861   bool is_twobyte_form = false;
862   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863   if (!add_right.NeedsRex()) {
864     is_twobyte_form = true;
865   }
866   X86_64ManagedRegister vvvv_reg =
867       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869   if (is_twobyte_form) {
870     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871   } else {
872     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873                                    /*X=*/ false,
874                                    add_right.NeedsRex(),
875                                    SET_VEX_M_0F);
876     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877   }
878   EmitUint8(ByteZero);
879   EmitUint8(ByteOne);
880   if (!is_twobyte_form) {
881     EmitUint8(ByteTwo);
882   }
883   EmitUint8(0x58);
884   EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886 
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888   DCHECK(CpuHasAVXorAVX2FeatureFlag());
889   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890   bool is_twobyte_form = false;
891   uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892   if (!src2.NeedsRex()) {
893     is_twobyte_form = true;
894   }
895   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897   if (is_twobyte_form) {
898     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899   } else {
900     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902   }
903   EmitUint8(byte_zero);
904   EmitUint8(byte_one);
905   if (!is_twobyte_form) {
906     EmitUint8(byte_two);
907   }
908   EmitUint8(0x5C);
909   EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911 
912 
mulps(XmmRegister dst,XmmRegister src)913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915   EmitOptionalRex32(dst, src);
916   EmitUint8(0x0F);
917   EmitUint8(0x59);
918   EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920 
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922   DCHECK(CpuHasAVXorAVX2FeatureFlag());
923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924   bool is_twobyte_form = false;
925   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926   if (!src2.NeedsRex()) {
927     is_twobyte_form = true;
928   }
929   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930   X86_64ManagedRegister vvvv_reg =
931       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932   if (is_twobyte_form) {
933     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934   } else {
935     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936                                    /*X=*/ false,
937                                    src2.NeedsRex(),
938                                    SET_VEX_M_0F);
939     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940   }
941   EmitUint8(ByteZero);
942   EmitUint8(ByteOne);
943   if (!is_twobyte_form) {
944     EmitUint8(ByteTwo);
945   }
946   EmitUint8(0x59);
947   EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949 
divps(XmmRegister dst,XmmRegister src)950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952   EmitOptionalRex32(dst, src);
953   EmitUint8(0x0F);
954   EmitUint8(0x5E);
955   EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957 
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959   DCHECK(CpuHasAVXorAVX2FeatureFlag());
960   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961   bool is_twobyte_form = false;
962   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963   if (!src2.NeedsRex()) {
964     is_twobyte_form = true;
965   }
966   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967   X86_64ManagedRegister vvvv_reg =
968       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969   if (is_twobyte_form) {
970     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971   } else {
972     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973                                    /*X=*/ false,
974                                    src2.NeedsRex(),
975                                    SET_VEX_M_0F);
976     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977   }
978   EmitUint8(ByteZero);
979   EmitUint8(ByteOne);
980   if (!is_twobyte_form) {
981     EmitUint8(ByteTwo);
982   }
983   EmitUint8(0x5E);
984   EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986 
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)987 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
988   DCHECK(CpuHasAVXorAVX2FeatureFlag());
989   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
990   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
991   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
992   X86_64ManagedRegister vvvv_reg =
993       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
994   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
995                                  /*X=*/ false,
996                                  right.NeedsRex(),
997                                  SET_VEX_M_0F_38);
998   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
999   EmitUint8(ByteZero);
1000   EmitUint8(ByteOne);
1001   EmitUint8(ByteTwo);
1002   EmitUint8(0xA9);
1003   EmitXmmRegisterOperand(acc.LowBits(), right);
1004 }
1005 
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)1006 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
1007   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1008   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1009   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1010   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1011   X86_64ManagedRegister vvvv_reg =
1012       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
1013   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
1014                                  /*X=*/ false,
1015                                  right.NeedsRex(),
1016                                  SET_VEX_M_0F_38);
1017   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1018   EmitUint8(ByteZero);
1019   EmitUint8(ByteOne);
1020   EmitUint8(ByteTwo);
1021   EmitUint8(0xA9);
1022   EmitXmmRegisterOperand(acc.LowBits(), right);
1023 }
flds(const Address & src)1024 void X86_64Assembler::flds(const Address& src) {
1025   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1026   EmitUint8(0xD9);
1027   EmitOperand(0, src);
1028 }
1029 
1030 
fsts(const Address & dst)1031 void X86_64Assembler::fsts(const Address& dst) {
1032   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1033   EmitUint8(0xD9);
1034   EmitOperand(2, dst);
1035 }
1036 
1037 
fstps(const Address & dst)1038 void X86_64Assembler::fstps(const Address& dst) {
1039   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1040   EmitUint8(0xD9);
1041   EmitOperand(3, dst);
1042 }
1043 
1044 
movapd(XmmRegister dst,XmmRegister src)1045 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1046   if (CpuHasAVXorAVX2FeatureFlag()) {
1047     vmovapd(dst, src);
1048     return;
1049   }
1050   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1051   EmitUint8(0x66);
1052   EmitOptionalRex32(dst, src);
1053   EmitUint8(0x0F);
1054   EmitUint8(0x28);
1055   EmitXmmRegisterOperand(dst.LowBits(), src);
1056 }
1057 
1058 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1059 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1060   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1062   uint8_t ByteZero, ByteOne, ByteTwo;
1063   bool is_twobyte_form = true;
1064 
1065   if (src.NeedsRex() && dst.NeedsRex()) {
1066     is_twobyte_form = false;
1067   }
1068   // Instruction VEX Prefix
1069   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1070   bool load = dst.NeedsRex();
1071   if (is_twobyte_form) {
1072     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1073     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1074     ByteOne = EmitVexPrefixByteOne(rex_bit,
1075                                    vvvv_reg,
1076                                    SET_VEX_L_128,
1077                                    SET_VEX_PP_66);
1078   } else {
1079     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1080                                    /*X=*/ false,
1081                                    src.NeedsRex(),
1082                                    SET_VEX_M_0F);
1083     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1084                                    SET_VEX_L_128,
1085                                    SET_VEX_PP_66);
1086   }
1087   EmitUint8(ByteZero);
1088   EmitUint8(ByteOne);
1089   if (!is_twobyte_form) {
1090     EmitUint8(ByteTwo);
1091   }
1092   // Instruction Opcode
1093   if (is_twobyte_form && !load) {
1094     EmitUint8(0x29);
1095   } else {
1096     EmitUint8(0x28);
1097   }
1098   // Instruction Operands
1099   if (is_twobyte_form && !load) {
1100     EmitXmmRegisterOperand(src.LowBits(), dst);
1101   } else {
1102     EmitXmmRegisterOperand(dst.LowBits(), src);
1103   }
1104 }
1105 
movapd(XmmRegister dst,const Address & src)1106 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1107   if (CpuHasAVXorAVX2FeatureFlag()) {
1108     vmovapd(dst, src);
1109     return;
1110   }
1111   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1112   EmitUint8(0x66);
1113   EmitOptionalRex32(dst, src);
1114   EmitUint8(0x0F);
1115   EmitUint8(0x28);
1116   EmitOperand(dst.LowBits(), src);
1117 }
1118 
1119 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1120 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1121   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1122   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1123   uint8_t ByteZero, ByteOne, ByteTwo;
1124   bool is_twobyte_form = false;
1125 
1126   // Instruction VEX Prefix
1127   uint8_t rex = src.rex();
1128   bool Rex_x = rex & GET_REX_X;
1129   bool Rex_b = rex & GET_REX_B;
1130   if (!Rex_b && !Rex_x) {
1131     is_twobyte_form = true;
1132   }
1133   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1134   if (is_twobyte_form) {
1135     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1136     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1137                                    vvvv_reg,
1138                                    SET_VEX_L_128,
1139                                    SET_VEX_PP_66);
1140   } else {
1141     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1142                                    Rex_x,
1143                                    Rex_b,
1144                                    SET_VEX_M_0F);
1145     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1146                                    SET_VEX_L_128,
1147                                    SET_VEX_PP_66);
1148   }
1149   EmitUint8(ByteZero);
1150   EmitUint8(ByteOne);
1151   if (!is_twobyte_form) {
1152     EmitUint8(ByteTwo);
1153   }
1154   // Instruction Opcode
1155   EmitUint8(0x28);
1156   // Instruction Operands
1157   EmitOperand(dst.LowBits(), src);
1158 }
1159 
movupd(XmmRegister dst,const Address & src)1160 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1161   if (CpuHasAVXorAVX2FeatureFlag()) {
1162     vmovupd(dst, src);
1163     return;
1164   }
1165   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1166   EmitUint8(0x66);
1167   EmitOptionalRex32(dst, src);
1168   EmitUint8(0x0F);
1169   EmitUint8(0x10);
1170   EmitOperand(dst.LowBits(), src);
1171 }
1172 
1173 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1174 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1175   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1176   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1177   bool is_twobyte_form = false;
1178   uint8_t ByteZero, ByteOne, ByteTwo;
1179 
1180   // Instruction VEX Prefix
1181   uint8_t rex = src.rex();
1182   bool Rex_x = rex & GET_REX_X;
1183   bool Rex_b = rex & GET_REX_B;
1184   if (!Rex_b && !Rex_x) {
1185     is_twobyte_form = true;
1186   }
1187   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1188   if (is_twobyte_form) {
1189     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1190     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1191                                    vvvv_reg,
1192                                    SET_VEX_L_128,
1193                                    SET_VEX_PP_66);
1194   } else {
1195     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1196                                    Rex_x,
1197                                    Rex_b,
1198                                    SET_VEX_M_0F);
1199     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1200                                    SET_VEX_L_128,
1201                                    SET_VEX_PP_66);
1202   }
1203   EmitUint8(ByteZero);
1204   EmitUint8(ByteOne);
1205   if (!is_twobyte_form)
1206   EmitUint8(ByteTwo);
1207   // Instruction Opcode
1208   EmitUint8(0x10);
1209   // Instruction Operands
1210   EmitOperand(dst.LowBits(), src);
1211 }
1212 
movapd(const Address & dst,XmmRegister src)1213 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1214   if (CpuHasAVXorAVX2FeatureFlag()) {
1215     vmovapd(dst, src);
1216     return;
1217   }
1218   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1219   EmitUint8(0x66);
1220   EmitOptionalRex32(src, dst);
1221   EmitUint8(0x0F);
1222   EmitUint8(0x29);
1223   EmitOperand(src.LowBits(), dst);
1224 }
1225 
1226 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1227 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1228   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1229   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1230   bool is_twobyte_form = false;
1231   uint8_t ByteZero, ByteOne, ByteTwo;
1232   // Instruction VEX Prefix
1233   uint8_t rex = dst.rex();
1234   bool Rex_x = rex & GET_REX_X;
1235   bool Rex_b = rex & GET_REX_B;
1236   if (!Rex_x && !Rex_b) {
1237     is_twobyte_form = true;
1238   }
1239   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1240   if (is_twobyte_form) {
1241     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1242     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1243                                    vvvv_reg,
1244                                    SET_VEX_L_128,
1245                                    SET_VEX_PP_66);
1246   } else {
1247     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1248                                    Rex_x,
1249                                    Rex_b,
1250                                    SET_VEX_M_0F);
1251     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1252                                    SET_VEX_L_128,
1253                                    SET_VEX_PP_66);
1254   }
1255   EmitUint8(ByteZero);
1256   EmitUint8(ByteOne);
1257   if (!is_twobyte_form) {
1258     EmitUint8(ByteTwo);
1259   }
1260   // Instruction Opcode
1261   EmitUint8(0x29);
1262   // Instruction Operands
1263   EmitOperand(src.LowBits(), dst);
1264 }
1265 
movupd(const Address & dst,XmmRegister src)1266 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1267   if (CpuHasAVXorAVX2FeatureFlag()) {
1268     vmovupd(dst, src);
1269     return;
1270   }
1271   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1272   EmitUint8(0x66);
1273   EmitOptionalRex32(src, dst);
1274   EmitUint8(0x0F);
1275   EmitUint8(0x11);
1276   EmitOperand(src.LowBits(), dst);
1277 }
1278 
1279 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1280 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1281   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1282   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1283   bool is_twobyte_form = false;
1284   uint8_t ByteZero, ByteOne, ByteTwo;
1285 
1286   // Instruction VEX Prefix
1287   uint8_t rex = dst.rex();
1288   bool Rex_x = rex & GET_REX_X;
1289   bool Rex_b = rex & GET_REX_B;
1290   if (!Rex_x && !Rex_b) {
1291     is_twobyte_form = true;
1292   }
1293   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1294   if (is_twobyte_form) {
1295     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1296     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1297                                    vvvv_reg,
1298                                    SET_VEX_L_128,
1299                                    SET_VEX_PP_66);
1300   } else {
1301     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1302                                    Rex_x,
1303                                    Rex_b,
1304                                    SET_VEX_M_0F);
1305     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1306                                    SET_VEX_L_128,
1307                                    SET_VEX_PP_66);
1308   }
1309   EmitUint8(ByteZero);
1310   EmitUint8(ByteOne);
1311   if (!is_twobyte_form) {
1312     EmitUint8(ByteTwo);
1313   }
1314   // Instruction Opcode
1315   EmitUint8(0x11);
1316   // Instruction Operands
1317   EmitOperand(src.LowBits(), dst);
1318 }
1319 
1320 
movsd(XmmRegister dst,const Address & src)1321 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1322   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1323   EmitUint8(0xF2);
1324   EmitOptionalRex32(dst, src);
1325   EmitUint8(0x0F);
1326   EmitUint8(0x10);
1327   EmitOperand(dst.LowBits(), src);
1328 }
1329 
1330 
movsd(const Address & dst,XmmRegister src)1331 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1332   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1333   EmitUint8(0xF2);
1334   EmitOptionalRex32(src, dst);
1335   EmitUint8(0x0F);
1336   EmitUint8(0x11);
1337   EmitOperand(src.LowBits(), dst);
1338 }
1339 
1340 
movsd(XmmRegister dst,XmmRegister src)1341 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1342   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1343   EmitUint8(0xF2);
1344   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1345   EmitUint8(0x0F);
1346   EmitUint8(0x11);
1347   EmitXmmRegisterOperand(src.LowBits(), dst);
1348 }
1349 
1350 
addsd(XmmRegister dst,XmmRegister src)1351 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1352   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1353   EmitUint8(0xF2);
1354   EmitOptionalRex32(dst, src);
1355   EmitUint8(0x0F);
1356   EmitUint8(0x58);
1357   EmitXmmRegisterOperand(dst.LowBits(), src);
1358 }
1359 
1360 
addsd(XmmRegister dst,const Address & src)1361 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1362   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1363   EmitUint8(0xF2);
1364   EmitOptionalRex32(dst, src);
1365   EmitUint8(0x0F);
1366   EmitUint8(0x58);
1367   EmitOperand(dst.LowBits(), src);
1368 }
1369 
1370 
subsd(XmmRegister dst,XmmRegister src)1371 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1372   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1373   EmitUint8(0xF2);
1374   EmitOptionalRex32(dst, src);
1375   EmitUint8(0x0F);
1376   EmitUint8(0x5C);
1377   EmitXmmRegisterOperand(dst.LowBits(), src);
1378 }
1379 
1380 
subsd(XmmRegister dst,const Address & src)1381 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1382   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1383   EmitUint8(0xF2);
1384   EmitOptionalRex32(dst, src);
1385   EmitUint8(0x0F);
1386   EmitUint8(0x5C);
1387   EmitOperand(dst.LowBits(), src);
1388 }
1389 
1390 
mulsd(XmmRegister dst,XmmRegister src)1391 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1392   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1393   EmitUint8(0xF2);
1394   EmitOptionalRex32(dst, src);
1395   EmitUint8(0x0F);
1396   EmitUint8(0x59);
1397   EmitXmmRegisterOperand(dst.LowBits(), src);
1398 }
1399 
1400 
mulsd(XmmRegister dst,const Address & src)1401 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1402   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1403   EmitUint8(0xF2);
1404   EmitOptionalRex32(dst, src);
1405   EmitUint8(0x0F);
1406   EmitUint8(0x59);
1407   EmitOperand(dst.LowBits(), src);
1408 }
1409 
1410 
divsd(XmmRegister dst,XmmRegister src)1411 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1412   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1413   EmitUint8(0xF2);
1414   EmitOptionalRex32(dst, src);
1415   EmitUint8(0x0F);
1416   EmitUint8(0x5E);
1417   EmitXmmRegisterOperand(dst.LowBits(), src);
1418 }
1419 
1420 
divsd(XmmRegister dst,const Address & src)1421 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1422   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1423   EmitUint8(0xF2);
1424   EmitOptionalRex32(dst, src);
1425   EmitUint8(0x0F);
1426   EmitUint8(0x5E);
1427   EmitOperand(dst.LowBits(), src);
1428 }
1429 
1430 
addpd(XmmRegister dst,XmmRegister src)1431 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1432   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1433   EmitUint8(0x66);
1434   EmitOptionalRex32(dst, src);
1435   EmitUint8(0x0F);
1436   EmitUint8(0x58);
1437   EmitXmmRegisterOperand(dst.LowBits(), src);
1438 }
1439 
1440 
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1441 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1442   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1443   bool is_twobyte_form = false;
1444   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1445   if (!add_right.NeedsRex()) {
1446     is_twobyte_form = true;
1447   }
1448   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1449   X86_64ManagedRegister vvvv_reg =
1450       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1451   if (is_twobyte_form) {
1452     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1453   } else {
1454     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1455                                    /*X=*/ false,
1456                                    add_right.NeedsRex(),
1457                                    SET_VEX_M_0F);
1458     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1459   }
1460   EmitUint8(ByteZero);
1461   EmitUint8(ByteOne);
1462   if (!is_twobyte_form) {
1463     EmitUint8(ByteTwo);
1464   }
1465   EmitUint8(0x58);
1466   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1467 }
1468 
1469 
subpd(XmmRegister dst,XmmRegister src)1470 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1471   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1472   EmitUint8(0x66);
1473   EmitOptionalRex32(dst, src);
1474   EmitUint8(0x0F);
1475   EmitUint8(0x5C);
1476   EmitXmmRegisterOperand(dst.LowBits(), src);
1477 }
1478 
1479 
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1480 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1481   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1482   bool is_twobyte_form = false;
1483   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1484   if (!src2.NeedsRex()) {
1485     is_twobyte_form = true;
1486   }
1487   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1488   X86_64ManagedRegister vvvv_reg =
1489       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1490   if (is_twobyte_form) {
1491     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1492   } else {
1493     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1494                                    /*X=*/ false,
1495                                    src2.NeedsRex(),
1496                                    SET_VEX_M_0F);
1497     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1498   }
1499   EmitUint8(ByteZero);
1500   EmitUint8(ByteOne);
1501   if (!is_twobyte_form) {
1502     EmitUint8(ByteTwo);
1503   }
1504   EmitUint8(0x5C);
1505   EmitXmmRegisterOperand(dst.LowBits(), src2);
1506 }
1507 
1508 
mulpd(XmmRegister dst,XmmRegister src)1509 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1510   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1511   EmitUint8(0x66);
1512   EmitOptionalRex32(dst, src);
1513   EmitUint8(0x0F);
1514   EmitUint8(0x59);
1515   EmitXmmRegisterOperand(dst.LowBits(), src);
1516 }
1517 
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1518 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1519   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1520   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1521   bool is_twobyte_form = false;
1522   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1523   if (!src2.NeedsRex()) {
1524     is_twobyte_form = true;
1525   }
1526   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1527   X86_64ManagedRegister vvvv_reg =
1528       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1529   if (is_twobyte_form) {
1530     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1531   } else {
1532     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1533                                    /*X=*/ false,
1534                                    src2.NeedsRex(),
1535                                    SET_VEX_M_0F);
1536     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1537   }
1538   EmitUint8(ByteZero);
1539   EmitUint8(ByteOne);
1540   if (!is_twobyte_form) {
1541     EmitUint8(ByteTwo);
1542   }
1543   EmitUint8(0x59);
1544   EmitXmmRegisterOperand(dst.LowBits(), src2);
1545 }
1546 
divpd(XmmRegister dst,XmmRegister src)1547 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1548   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1549   EmitUint8(0x66);
1550   EmitOptionalRex32(dst, src);
1551   EmitUint8(0x0F);
1552   EmitUint8(0x5E);
1553   EmitXmmRegisterOperand(dst.LowBits(), src);
1554 }
1555 
1556 
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1557 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1558   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1559   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1560   bool is_twobyte_form = false;
1561   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1562   if (!src2.NeedsRex()) {
1563     is_twobyte_form = true;
1564   }
1565   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1566   X86_64ManagedRegister vvvv_reg =
1567       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1568   if (is_twobyte_form) {
1569     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1570   } else {
1571     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1572                                    /*X=*/ false,
1573                                    src2.NeedsRex(),
1574                                    SET_VEX_M_0F);
1575     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1576   }
1577   EmitUint8(ByteZero);
1578   EmitUint8(ByteOne);
1579   if (!is_twobyte_form) {
1580     EmitUint8(ByteTwo);
1581   }
1582   EmitUint8(0x5E);
1583   EmitXmmRegisterOperand(dst.LowBits(), src2);
1584 }
1585 
1586 
movdqa(XmmRegister dst,XmmRegister src)1587 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1588   if (CpuHasAVXorAVX2FeatureFlag()) {
1589     vmovdqa(dst, src);
1590     return;
1591   }
1592   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1593   EmitUint8(0x66);
1594   EmitOptionalRex32(dst, src);
1595   EmitUint8(0x0F);
1596   EmitUint8(0x6F);
1597   EmitXmmRegisterOperand(dst.LowBits(), src);
1598 }
1599 
1600 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1601 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1602   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1603   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1604   uint8_t ByteZero, ByteOne, ByteTwo;
1605   bool is_twobyte_form = true;
1606 
1607   // Instruction VEX Prefix
1608   if (src.NeedsRex() && dst.NeedsRex()) {
1609     is_twobyte_form = false;
1610   }
1611   bool load = dst.NeedsRex();
1612   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1613   if (is_twobyte_form) {
1614     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1615     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1616     ByteOne = EmitVexPrefixByteOne(rex_bit,
1617                                    vvvv_reg,
1618                                    SET_VEX_L_128,
1619                                    SET_VEX_PP_66);
1620   } else {
1621     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1622                                    /*X=*/ false,
1623                                    src.NeedsRex(),
1624                                    SET_VEX_M_0F);
1625     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1626                                    SET_VEX_L_128,
1627                                    SET_VEX_PP_66);
1628   }
1629   EmitUint8(ByteZero);
1630   EmitUint8(ByteOne);
1631   if (!is_twobyte_form) {
1632     EmitUint8(ByteTwo);
1633   }
1634   // Instruction Opcode
1635   if (is_twobyte_form && !load) {
1636     EmitUint8(0x7F);
1637   } else {
1638     EmitUint8(0x6F);
1639   }
1640   // Instruction Operands
1641   if (is_twobyte_form && !load) {
1642     EmitXmmRegisterOperand(src.LowBits(), dst);
1643   } else {
1644     EmitXmmRegisterOperand(dst.LowBits(), src);
1645   }
1646 }
1647 
movdqa(XmmRegister dst,const Address & src)1648 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1649   if (CpuHasAVXorAVX2FeatureFlag()) {
1650     vmovdqa(dst, src);
1651     return;
1652   }
1653   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1654   EmitUint8(0x66);
1655   EmitOptionalRex32(dst, src);
1656   EmitUint8(0x0F);
1657   EmitUint8(0x6F);
1658   EmitOperand(dst.LowBits(), src);
1659 }
1660 
1661 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1662 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1663   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1665   uint8_t  ByteZero, ByteOne, ByteTwo;
1666   bool is_twobyte_form = false;
1667 
1668   // Instruction VEX Prefix
1669   uint8_t rex = src.rex();
1670   bool Rex_x = rex & GET_REX_X;
1671   bool Rex_b = rex & GET_REX_B;
1672   if (!Rex_x && !Rex_b) {
1673     is_twobyte_form = true;
1674   }
1675   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1676   if (is_twobyte_form) {
1677     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1678     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1679                                    vvvv_reg,
1680                                    SET_VEX_L_128,
1681                                    SET_VEX_PP_66);
1682   } else {
1683     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1684                                    Rex_x,
1685                                    Rex_b,
1686                                    SET_VEX_M_0F);
1687     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1688                                    SET_VEX_L_128,
1689                                    SET_VEX_PP_66);
1690   }
1691   EmitUint8(ByteZero);
1692   EmitUint8(ByteOne);
1693   if (!is_twobyte_form) {
1694     EmitUint8(ByteTwo);
1695   }
1696   // Instruction Opcode
1697   EmitUint8(0x6F);
1698   // Instruction Operands
1699   EmitOperand(dst.LowBits(), src);
1700 }
1701 
movdqu(XmmRegister dst,const Address & src)1702 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1703   if (CpuHasAVXorAVX2FeatureFlag()) {
1704     vmovdqu(dst, src);
1705     return;
1706   }
1707   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1708   EmitUint8(0xF3);
1709   EmitOptionalRex32(dst, src);
1710   EmitUint8(0x0F);
1711   EmitUint8(0x6F);
1712   EmitOperand(dst.LowBits(), src);
1713 }
1714 
1715 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1716 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1717 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1718   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1719   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1720   uint8_t ByteZero, ByteOne, ByteTwo;
1721   bool is_twobyte_form = false;
1722 
1723   // Instruction VEX Prefix
1724   uint8_t rex = src.rex();
1725   bool Rex_x = rex & GET_REX_X;
1726   bool Rex_b = rex & GET_REX_B;
1727   if (!Rex_x && !Rex_b) {
1728     is_twobyte_form = true;
1729   }
1730   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1731   if (is_twobyte_form) {
1732     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1733     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1734                                    vvvv_reg,
1735                                    SET_VEX_L_128,
1736                                    SET_VEX_PP_F3);
1737   } else {
1738     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1739                                    Rex_x,
1740                                    Rex_b,
1741                                    SET_VEX_M_0F);
1742     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1743                                    SET_VEX_L_128,
1744                                    SET_VEX_PP_F3);
1745   }
1746   EmitUint8(ByteZero);
1747   EmitUint8(ByteOne);
1748   if (!is_twobyte_form) {
1749     EmitUint8(ByteTwo);
1750   }
1751   // Instruction Opcode
1752   EmitUint8(0x6F);
1753   // Instruction Operands
1754   EmitOperand(dst.LowBits(), src);
1755 }
1756 
movdqa(const Address & dst,XmmRegister src)1757 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1758   if (CpuHasAVXorAVX2FeatureFlag()) {
1759     vmovdqa(dst, src);
1760     return;
1761   }
1762   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1763   EmitUint8(0x66);
1764   EmitOptionalRex32(src, dst);
1765   EmitUint8(0x0F);
1766   EmitUint8(0x7F);
1767   EmitOperand(src.LowBits(), dst);
1768 }
1769 
1770 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1771 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1772   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1773   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1774   bool is_twobyte_form = false;
1775   uint8_t ByteZero, ByteOne, ByteTwo;
1776   // Instruction VEX Prefix
1777   uint8_t rex = dst.rex();
1778   bool Rex_x = rex & GET_REX_X;
1779   bool Rex_b = rex & GET_REX_B;
1780   if (!Rex_x && !Rex_b) {
1781     is_twobyte_form = true;
1782   }
1783   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1784   if (is_twobyte_form) {
1785     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1786     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1787                                    vvvv_reg,
1788                                    SET_VEX_L_128,
1789                                    SET_VEX_PP_66);
1790   } else {
1791     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1792                                    Rex_x,
1793                                    Rex_b,
1794                                    SET_VEX_M_0F);
1795     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1796                                    SET_VEX_L_128,
1797                                    SET_VEX_PP_66);
1798   }
1799   EmitUint8(ByteZero);
1800   EmitUint8(ByteOne);
1801   if (!is_twobyte_form) {
1802     EmitUint8(ByteTwo);
1803   }
1804   // Instruction Opcode
1805   EmitUint8(0x7F);
1806   // Instruction Operands
1807   EmitOperand(src.LowBits(), dst);
1808 }
1809 
movdqu(const Address & dst,XmmRegister src)1810 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1811   if (CpuHasAVXorAVX2FeatureFlag()) {
1812     vmovdqu(dst, src);
1813     return;
1814   }
1815   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1816   EmitUint8(0xF3);
1817   EmitOptionalRex32(src, dst);
1818   EmitUint8(0x0F);
1819   EmitUint8(0x7F);
1820   EmitOperand(src.LowBits(), dst);
1821 }
1822 
1823 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1824 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1825   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1826   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1827   uint8_t ByteZero, ByteOne, ByteTwo;
1828   bool is_twobyte_form = false;
1829 
1830   // Instruction VEX Prefix
1831   uint8_t rex = dst.rex();
1832   bool Rex_x = rex & GET_REX_X;
1833   bool Rex_b = rex & GET_REX_B;
1834   if (!Rex_b && !Rex_x) {
1835     is_twobyte_form = true;
1836   }
1837   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1838   if (is_twobyte_form) {
1839     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1840     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1841                                    vvvv_reg,
1842                                    SET_VEX_L_128,
1843                                    SET_VEX_PP_F3);
1844   } else {
1845     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1846                                    Rex_x,
1847                                    Rex_b,
1848                                    SET_VEX_M_0F);
1849     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1850                                    SET_VEX_L_128,
1851                                    SET_VEX_PP_F3);
1852   }
1853   EmitUint8(ByteZero);
1854   EmitUint8(ByteOne);
1855   if (!is_twobyte_form) {
1856     EmitUint8(ByteTwo);
1857   }
1858   // Instruction Opcode
1859   EmitUint8(0x7F);
1860   // Instruction Operands
1861   EmitOperand(src.LowBits(), dst);
1862 }
1863 
paddb(XmmRegister dst,XmmRegister src)1864 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1865   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1866   EmitUint8(0x66);
1867   EmitOptionalRex32(dst, src);
1868   EmitUint8(0x0F);
1869   EmitUint8(0xFC);
1870   EmitXmmRegisterOperand(dst.LowBits(), src);
1871 }
1872 
1873 
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1874 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1875   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1876   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1877   uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1878   bool is_twobyte_form = true;
1879   if (add_right.NeedsRex()) {
1880     is_twobyte_form = false;
1881   }
1882   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1883   X86_64ManagedRegister vvvv_reg =
1884       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1885   if (is_twobyte_form) {
1886     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1887   } else {
1888     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1889                                    /*X=*/ false,
1890                                    add_right.NeedsRex(),
1891                                    SET_VEX_M_0F);
1892     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1893   }
1894   EmitUint8(ByteZero);
1895   EmitUint8(ByteOne);
1896   if (!is_twobyte_form) {
1897     EmitUint8(ByteTwo);
1898   }
1899   EmitUint8(0xFC);
1900   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1901 }
1902 
1903 
psubb(XmmRegister dst,XmmRegister src)1904 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1905   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1906   EmitUint8(0x66);
1907   EmitOptionalRex32(dst, src);
1908   EmitUint8(0x0F);
1909   EmitUint8(0xF8);
1910   EmitXmmRegisterOperand(dst.LowBits(), src);
1911 }
1912 
1913 
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1914 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1915   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1917   bool is_twobyte_form = false;
1918   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1919   if (!add_right.NeedsRex()) {
1920     is_twobyte_form = true;
1921   }
1922   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1923   X86_64ManagedRegister vvvv_reg =
1924       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1925   if (is_twobyte_form) {
1926     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1927   } else {
1928     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1929                                    /*X=*/ false,
1930                                    add_right.NeedsRex(),
1931                                    SET_VEX_M_0F);
1932     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1933   }
1934   EmitUint8(ByteZero);
1935   EmitUint8(ByteOne);
1936   if (!is_twobyte_form) {
1937     EmitUint8(ByteTwo);
1938   }
1939   EmitUint8(0xF8);
1940   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1941 }
1942 
1943 
paddw(XmmRegister dst,XmmRegister src)1944 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1945   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1946   EmitUint8(0x66);
1947   EmitOptionalRex32(dst, src);
1948   EmitUint8(0x0F);
1949   EmitUint8(0xFD);
1950   EmitXmmRegisterOperand(dst.LowBits(), src);
1951 }
1952 
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1953 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1954   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1956   bool is_twobyte_form = false;
1957   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1958   if (!add_right.NeedsRex()) {
1959     is_twobyte_form = true;
1960   }
1961   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1962   X86_64ManagedRegister vvvv_reg =
1963       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1964   if (is_twobyte_form) {
1965     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1966   } else {
1967     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1968                                    /*X=*/ false,
1969                                    add_right.NeedsRex(),
1970                                    SET_VEX_M_0F);
1971     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1972   }
1973   EmitUint8(ByteZero);
1974   EmitUint8(ByteOne);
1975   if (!is_twobyte_form) {
1976     EmitUint8(ByteTwo);
1977   }
1978   EmitUint8(0xFD);
1979   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1980 }
1981 
1982 
psubw(XmmRegister dst,XmmRegister src)1983 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1984   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1985   EmitUint8(0x66);
1986   EmitOptionalRex32(dst, src);
1987   EmitUint8(0x0F);
1988   EmitUint8(0xF9);
1989   EmitXmmRegisterOperand(dst.LowBits(), src);
1990 }
1991 
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1992 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1993   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1994   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1995   bool is_twobyte_form = false;
1996   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1997   if (!add_right.NeedsRex()) {
1998     is_twobyte_form = true;
1999   }
2000   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2001   X86_64ManagedRegister vvvv_reg =
2002       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2003   if (is_twobyte_form) {
2004     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2005   } else {
2006     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2007                                    /*X=*/ false,
2008                                    add_right.NeedsRex(),
2009                                    SET_VEX_M_0F);
2010     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2011   }
2012   EmitUint8(ByteZero);
2013   EmitUint8(ByteOne);
2014   if (!is_twobyte_form) {
2015     EmitUint8(ByteTwo);
2016   }
2017   EmitUint8(0xF9);
2018   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2019 }
2020 
2021 
pmullw(XmmRegister dst,XmmRegister src)2022 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
2023   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2024   EmitUint8(0x66);
2025   EmitOptionalRex32(dst, src);
2026   EmitUint8(0x0F);
2027   EmitUint8(0xD5);
2028   EmitXmmRegisterOperand(dst.LowBits(), src);
2029 }
2030 
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)2031 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2032   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2033   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2034   bool is_twobyte_form = false;
2035   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2036   if (!src2.NeedsRex()) {
2037     is_twobyte_form = true;
2038   }
2039   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2040   X86_64ManagedRegister vvvv_reg =
2041       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2042   if (is_twobyte_form) {
2043     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2044   } else {
2045     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2046                                    /*X=*/ false,
2047                                    src2.NeedsRex(),
2048                                    SET_VEX_M_0F);
2049     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2050   }
2051   EmitUint8(ByteZero);
2052   EmitUint8(ByteOne);
2053   if (!is_twobyte_form) {
2054     EmitUint8(ByteTwo);
2055   }
2056   EmitUint8(0xD5);
2057   EmitXmmRegisterOperand(dst.LowBits(), src2);
2058 }
2059 
paddd(XmmRegister dst,XmmRegister src)2060 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2062   EmitUint8(0x66);
2063   EmitOptionalRex32(dst, src);
2064   EmitUint8(0x0F);
2065   EmitUint8(0xFE);
2066   EmitXmmRegisterOperand(dst.LowBits(), src);
2067 }
2068 
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2069 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2070   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2072   bool is_twobyte_form = false;
2073   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2074   if (!add_right.NeedsRex()) {
2075     is_twobyte_form = true;
2076   }
2077   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2078   X86_64ManagedRegister vvvv_reg =
2079       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2080   if (is_twobyte_form) {
2081     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2082   } else {
2083     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2084                                    /*X=*/ false,
2085                                    add_right.NeedsRex(),
2086                                    SET_VEX_M_0F);
2087     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2088   }
2089   EmitUint8(ByteZero);
2090   EmitUint8(ByteOne);
2091   if (!is_twobyte_form) {
2092     EmitUint8(ByteTwo);
2093   }
2094   EmitUint8(0xFE);
2095   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2096 }
2097 
psubd(XmmRegister dst,XmmRegister src)2098 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2099   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2100   EmitUint8(0x66);
2101   EmitOptionalRex32(dst, src);
2102   EmitUint8(0x0F);
2103   EmitUint8(0xFA);
2104   EmitXmmRegisterOperand(dst.LowBits(), src);
2105 }
2106 
2107 
pmulld(XmmRegister dst,XmmRegister src)2108 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2109   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2110   EmitUint8(0x66);
2111   EmitOptionalRex32(dst, src);
2112   EmitUint8(0x0F);
2113   EmitUint8(0x38);
2114   EmitUint8(0x40);
2115   EmitXmmRegisterOperand(dst.LowBits(), src);
2116 }
2117 
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2118 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2119   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2121   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2122   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2123   X86_64ManagedRegister vvvv_reg =
2124       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2125   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2126                                    /*X=*/ false,
2127                                    src2.NeedsRex(),
2128                                    SET_VEX_M_0F_38);
2129   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2130   EmitUint8(ByteZero);
2131   EmitUint8(ByteOne);
2132   EmitUint8(ByteTwo);
2133   EmitUint8(0x40);
2134   EmitXmmRegisterOperand(dst.LowBits(), src2);
2135 }
2136 
paddq(XmmRegister dst,XmmRegister src)2137 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2138   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2139   EmitUint8(0x66);
2140   EmitOptionalRex32(dst, src);
2141   EmitUint8(0x0F);
2142   EmitUint8(0xD4);
2143   EmitXmmRegisterOperand(dst.LowBits(), src);
2144 }
2145 
2146 
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2147 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2148   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2149   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2150   bool is_twobyte_form = false;
2151   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2152   if (!add_right.NeedsRex()) {
2153     is_twobyte_form = true;
2154   }
2155   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2156   X86_64ManagedRegister vvvv_reg =
2157       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2158   if (is_twobyte_form) {
2159     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2160   } else {
2161     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2162                                    /*X=*/ false,
2163                                    add_right.NeedsRex(),
2164                                    SET_VEX_M_0F);
2165     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2166   }
2167   EmitUint8(ByteZero);
2168   EmitUint8(ByteOne);
2169   if (!is_twobyte_form) {
2170     EmitUint8(ByteTwo);
2171   }
2172   EmitUint8(0xD4);
2173   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2174 }
2175 
2176 
psubq(XmmRegister dst,XmmRegister src)2177 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2179   EmitUint8(0x66);
2180   EmitOptionalRex32(dst, src);
2181   EmitUint8(0x0F);
2182   EmitUint8(0xFB);
2183   EmitXmmRegisterOperand(dst.LowBits(), src);
2184 }
2185 
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2186 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2187   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2188   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2189   bool is_twobyte_form = false;
2190   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2191   if (!add_right.NeedsRex()) {
2192     is_twobyte_form = true;
2193   }
2194   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2195   X86_64ManagedRegister vvvv_reg =
2196       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2197   if (is_twobyte_form) {
2198     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2199   } else {
2200     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2201                                    /*X=*/ false,
2202                                    add_right.NeedsRex(),
2203                                    SET_VEX_M_0F);
2204     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2205   }
2206   EmitUint8(ByteZero);
2207   EmitUint8(ByteOne);
2208   if (!is_twobyte_form) {
2209     EmitUint8(ByteTwo);
2210   }
2211   EmitUint8(0xFB);
2212   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2213 }
2214 
2215 
paddusb(XmmRegister dst,XmmRegister src)2216 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2217   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2218   EmitUint8(0x66);
2219   EmitOptionalRex32(dst, src);
2220   EmitUint8(0x0F);
2221   EmitUint8(0xDC);
2222   EmitXmmRegisterOperand(dst.LowBits(), src);
2223 }
2224 
2225 
paddsb(XmmRegister dst,XmmRegister src)2226 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2227   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2228   EmitUint8(0x66);
2229   EmitOptionalRex32(dst, src);
2230   EmitUint8(0x0F);
2231   EmitUint8(0xEC);
2232   EmitXmmRegisterOperand(dst.LowBits(), src);
2233 }
2234 
2235 
paddusw(XmmRegister dst,XmmRegister src)2236 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2237   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2238   EmitUint8(0x66);
2239   EmitOptionalRex32(dst, src);
2240   EmitUint8(0x0F);
2241   EmitUint8(0xDD);
2242   EmitXmmRegisterOperand(dst.LowBits(), src);
2243 }
2244 
2245 
paddsw(XmmRegister dst,XmmRegister src)2246 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2247   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2248   EmitUint8(0x66);
2249   EmitOptionalRex32(dst, src);
2250   EmitUint8(0x0F);
2251   EmitUint8(0xED);
2252   EmitXmmRegisterOperand(dst.LowBits(), src);
2253 }
2254 
2255 
psubusb(XmmRegister dst,XmmRegister src)2256 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2257   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2258   EmitUint8(0x66);
2259   EmitOptionalRex32(dst, src);
2260   EmitUint8(0x0F);
2261   EmitUint8(0xD8);
2262   EmitXmmRegisterOperand(dst.LowBits(), src);
2263 }
2264 
2265 
psubsb(XmmRegister dst,XmmRegister src)2266 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2267   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2268   EmitUint8(0x66);
2269   EmitOptionalRex32(dst, src);
2270   EmitUint8(0x0F);
2271   EmitUint8(0xE8);
2272   EmitXmmRegisterOperand(dst.LowBits(), src);
2273 }
2274 
2275 
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2276 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2277   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2278   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2279   bool is_twobyte_form = false;
2280   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2281   if (!add_right.NeedsRex()) {
2282     is_twobyte_form = true;
2283   }
2284   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2285   X86_64ManagedRegister vvvv_reg =
2286       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2287   if (is_twobyte_form) {
2288     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2289   } else {
2290     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2291                                    /*X=*/ false,
2292                                    add_right.NeedsRex(),
2293                                    SET_VEX_M_0F);
2294     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2295   }
2296   EmitUint8(ByteZero);
2297   EmitUint8(ByteOne);
2298   if (!is_twobyte_form) {
2299     EmitUint8(ByteTwo);
2300   }
2301   EmitUint8(0xFA);
2302   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2303 }
2304 
2305 
psubusw(XmmRegister dst,XmmRegister src)2306 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2307   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2308   EmitUint8(0x66);
2309   EmitOptionalRex32(dst, src);
2310   EmitUint8(0x0F);
2311   EmitUint8(0xD9);
2312   EmitXmmRegisterOperand(dst.LowBits(), src);
2313 }
2314 
2315 
psubsw(XmmRegister dst,XmmRegister src)2316 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2317   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2318   EmitUint8(0x66);
2319   EmitOptionalRex32(dst, src);
2320   EmitUint8(0x0F);
2321   EmitUint8(0xE9);
2322   EmitXmmRegisterOperand(dst.LowBits(), src);
2323 }
2324 
2325 
cvtsi2ss(XmmRegister dst,CpuRegister src)2326 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2327   cvtsi2ss(dst, src, false);
2328 }
2329 
2330 
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2331 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2332   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2333   EmitUint8(0xF3);
2334   if (is64bit) {
2335     // Emit a REX.W prefix if the operand size is 64 bits.
2336     EmitRex64(dst, src);
2337   } else {
2338     EmitOptionalRex32(dst, src);
2339   }
2340   EmitUint8(0x0F);
2341   EmitUint8(0x2A);
2342   EmitOperand(dst.LowBits(), Operand(src));
2343 }
2344 
2345 
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2346 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2348   EmitUint8(0xF3);
2349   if (is64bit) {
2350     // Emit a REX.W prefix if the operand size is 64 bits.
2351     EmitRex64(dst, src);
2352   } else {
2353     EmitOptionalRex32(dst, src);
2354   }
2355   EmitUint8(0x0F);
2356   EmitUint8(0x2A);
2357   EmitOperand(dst.LowBits(), src);
2358 }
2359 
2360 
cvtsi2sd(XmmRegister dst,CpuRegister src)2361 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2362   cvtsi2sd(dst, src, false);
2363 }
2364 
2365 
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2366 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2367   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2368   EmitUint8(0xF2);
2369   if (is64bit) {
2370     // Emit a REX.W prefix if the operand size is 64 bits.
2371     EmitRex64(dst, src);
2372   } else {
2373     EmitOptionalRex32(dst, src);
2374   }
2375   EmitUint8(0x0F);
2376   EmitUint8(0x2A);
2377   EmitOperand(dst.LowBits(), Operand(src));
2378 }
2379 
2380 
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2381 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2382   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2383   EmitUint8(0xF2);
2384   if (is64bit) {
2385     // Emit a REX.W prefix if the operand size is 64 bits.
2386     EmitRex64(dst, src);
2387   } else {
2388     EmitOptionalRex32(dst, src);
2389   }
2390   EmitUint8(0x0F);
2391   EmitUint8(0x2A);
2392   EmitOperand(dst.LowBits(), src);
2393 }
2394 
2395 
cvtss2si(CpuRegister dst,XmmRegister src)2396 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2397   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2398   EmitUint8(0xF3);
2399   EmitOptionalRex32(dst, src);
2400   EmitUint8(0x0F);
2401   EmitUint8(0x2D);
2402   EmitXmmRegisterOperand(dst.LowBits(), src);
2403 }
2404 
2405 
cvtss2sd(XmmRegister dst,XmmRegister src)2406 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2407   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2408   EmitUint8(0xF3);
2409   EmitOptionalRex32(dst, src);
2410   EmitUint8(0x0F);
2411   EmitUint8(0x5A);
2412   EmitXmmRegisterOperand(dst.LowBits(), src);
2413 }
2414 
2415 
cvtss2sd(XmmRegister dst,const Address & src)2416 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2417   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2418   EmitUint8(0xF3);
2419   EmitOptionalRex32(dst, src);
2420   EmitUint8(0x0F);
2421   EmitUint8(0x5A);
2422   EmitOperand(dst.LowBits(), src);
2423 }
2424 
2425 
cvtsd2si(CpuRegister dst,XmmRegister src)2426 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2427   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2428   EmitUint8(0xF2);
2429   EmitOptionalRex32(dst, src);
2430   EmitUint8(0x0F);
2431   EmitUint8(0x2D);
2432   EmitXmmRegisterOperand(dst.LowBits(), src);
2433 }
2434 
2435 
cvttss2si(CpuRegister dst,XmmRegister src)2436 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2437   cvttss2si(dst, src, false);
2438 }
2439 
2440 
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2441 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2442   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2443   EmitUint8(0xF3);
2444   if (is64bit) {
2445     // Emit a REX.W prefix if the operand size is 64 bits.
2446     EmitRex64(dst, src);
2447   } else {
2448     EmitOptionalRex32(dst, src);
2449   }
2450   EmitUint8(0x0F);
2451   EmitUint8(0x2C);
2452   EmitXmmRegisterOperand(dst.LowBits(), src);
2453 }
2454 
2455 
cvttsd2si(CpuRegister dst,XmmRegister src)2456 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2457   cvttsd2si(dst, src, false);
2458 }
2459 
2460 
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2461 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2462   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2463   EmitUint8(0xF2);
2464   if (is64bit) {
2465     // Emit a REX.W prefix if the operand size is 64 bits.
2466     EmitRex64(dst, src);
2467   } else {
2468     EmitOptionalRex32(dst, src);
2469   }
2470   EmitUint8(0x0F);
2471   EmitUint8(0x2C);
2472   EmitXmmRegisterOperand(dst.LowBits(), src);
2473 }
2474 
2475 
cvtsd2ss(XmmRegister dst,XmmRegister src)2476 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2478   EmitUint8(0xF2);
2479   EmitOptionalRex32(dst, src);
2480   EmitUint8(0x0F);
2481   EmitUint8(0x5A);
2482   EmitXmmRegisterOperand(dst.LowBits(), src);
2483 }
2484 
2485 
cvtsd2ss(XmmRegister dst,const Address & src)2486 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2488   EmitUint8(0xF2);
2489   EmitOptionalRex32(dst, src);
2490   EmitUint8(0x0F);
2491   EmitUint8(0x5A);
2492   EmitOperand(dst.LowBits(), src);
2493 }
2494 
2495 
cvtdq2ps(XmmRegister dst,XmmRegister src)2496 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498   EmitOptionalRex32(dst, src);
2499   EmitUint8(0x0F);
2500   EmitUint8(0x5B);
2501   EmitXmmRegisterOperand(dst.LowBits(), src);
2502 }
2503 
2504 
cvtdq2pd(XmmRegister dst,XmmRegister src)2505 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2506   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2507   EmitUint8(0xF3);
2508   EmitOptionalRex32(dst, src);
2509   EmitUint8(0x0F);
2510   EmitUint8(0xE6);
2511   EmitXmmRegisterOperand(dst.LowBits(), src);
2512 }
2513 
2514 
comiss(XmmRegister a,XmmRegister b)2515 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2516   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2517   EmitOptionalRex32(a, b);
2518   EmitUint8(0x0F);
2519   EmitUint8(0x2F);
2520   EmitXmmRegisterOperand(a.LowBits(), b);
2521 }
2522 
2523 
comiss(XmmRegister a,const Address & b)2524 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2525   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2526   EmitOptionalRex32(a, b);
2527   EmitUint8(0x0F);
2528   EmitUint8(0x2F);
2529   EmitOperand(a.LowBits(), b);
2530 }
2531 
2532 
comisd(XmmRegister a,XmmRegister b)2533 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2534   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2535   EmitUint8(0x66);
2536   EmitOptionalRex32(a, b);
2537   EmitUint8(0x0F);
2538   EmitUint8(0x2F);
2539   EmitXmmRegisterOperand(a.LowBits(), b);
2540 }
2541 
2542 
comisd(XmmRegister a,const Address & b)2543 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2544   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2545   EmitUint8(0x66);
2546   EmitOptionalRex32(a, b);
2547   EmitUint8(0x0F);
2548   EmitUint8(0x2F);
2549   EmitOperand(a.LowBits(), b);
2550 }
2551 
2552 
ucomiss(XmmRegister a,XmmRegister b)2553 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2554   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2555   EmitOptionalRex32(a, b);
2556   EmitUint8(0x0F);
2557   EmitUint8(0x2E);
2558   EmitXmmRegisterOperand(a.LowBits(), b);
2559 }
2560 
2561 
ucomiss(XmmRegister a,const Address & b)2562 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2563   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2564   EmitOptionalRex32(a, b);
2565   EmitUint8(0x0F);
2566   EmitUint8(0x2E);
2567   EmitOperand(a.LowBits(), b);
2568 }
2569 
2570 
ucomisd(XmmRegister a,XmmRegister b)2571 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2572   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2573   EmitUint8(0x66);
2574   EmitOptionalRex32(a, b);
2575   EmitUint8(0x0F);
2576   EmitUint8(0x2E);
2577   EmitXmmRegisterOperand(a.LowBits(), b);
2578 }
2579 
2580 
ucomisd(XmmRegister a,const Address & b)2581 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2582   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2583   EmitUint8(0x66);
2584   EmitOptionalRex32(a, b);
2585   EmitUint8(0x0F);
2586   EmitUint8(0x2E);
2587   EmitOperand(a.LowBits(), b);
2588 }
2589 
2590 
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2591 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2592   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2593   EmitUint8(0x66);
2594   EmitOptionalRex32(dst, src);
2595   EmitUint8(0x0F);
2596   EmitUint8(0x3A);
2597   EmitUint8(0x0B);
2598   EmitXmmRegisterOperand(dst.LowBits(), src);
2599   EmitUint8(imm.value());
2600 }
2601 
2602 
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2603 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2604   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2605   EmitUint8(0x66);
2606   EmitOptionalRex32(dst, src);
2607   EmitUint8(0x0F);
2608   EmitUint8(0x3A);
2609   EmitUint8(0x0A);
2610   EmitXmmRegisterOperand(dst.LowBits(), src);
2611   EmitUint8(imm.value());
2612 }
2613 
2614 
sqrtsd(XmmRegister dst,XmmRegister src)2615 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2616   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2617   EmitUint8(0xF2);
2618   EmitOptionalRex32(dst, src);
2619   EmitUint8(0x0F);
2620   EmitUint8(0x51);
2621   EmitXmmRegisterOperand(dst.LowBits(), src);
2622 }
2623 
2624 
sqrtss(XmmRegister dst,XmmRegister src)2625 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2626   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2627   EmitUint8(0xF3);
2628   EmitOptionalRex32(dst, src);
2629   EmitUint8(0x0F);
2630   EmitUint8(0x51);
2631   EmitXmmRegisterOperand(dst.LowBits(), src);
2632 }
2633 
2634 
xorpd(XmmRegister dst,const Address & src)2635 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2636   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637   EmitUint8(0x66);
2638   EmitOptionalRex32(dst, src);
2639   EmitUint8(0x0F);
2640   EmitUint8(0x57);
2641   EmitOperand(dst.LowBits(), src);
2642 }
2643 
2644 
xorpd(XmmRegister dst,XmmRegister src)2645 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2646   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2647   EmitUint8(0x66);
2648   EmitOptionalRex32(dst, src);
2649   EmitUint8(0x0F);
2650   EmitUint8(0x57);
2651   EmitXmmRegisterOperand(dst.LowBits(), src);
2652 }
2653 
2654 
xorps(XmmRegister dst,const Address & src)2655 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2656   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2657   EmitOptionalRex32(dst, src);
2658   EmitUint8(0x0F);
2659   EmitUint8(0x57);
2660   EmitOperand(dst.LowBits(), src);
2661 }
2662 
2663 
xorps(XmmRegister dst,XmmRegister src)2664 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2665   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2666   EmitOptionalRex32(dst, src);
2667   EmitUint8(0x0F);
2668   EmitUint8(0x57);
2669   EmitXmmRegisterOperand(dst.LowBits(), src);
2670 }
2671 
pxor(XmmRegister dst,XmmRegister src)2672 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2673   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2674   EmitUint8(0x66);
2675   EmitOptionalRex32(dst, src);
2676   EmitUint8(0x0F);
2677   EmitUint8(0xEF);
2678   EmitXmmRegisterOperand(dst.LowBits(), src);
2679 }
2680 
2681 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2682 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2683   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2684   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2685   bool is_twobyte_form = false;
2686   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2687   if (!src2.NeedsRex()) {
2688     is_twobyte_form = true;
2689   }
2690   X86_64ManagedRegister vvvv_reg =
2691       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2692   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2693   if (is_twobyte_form) {
2694     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2695   } else {
2696     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2697                                    /*X=*/ false,
2698                                    src2.NeedsRex(),
2699                                    SET_VEX_M_0F);
2700     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2701   }
2702   EmitUint8(ByteZero);
2703   EmitUint8(ByteOne);
2704   if (!is_twobyte_form) {
2705     EmitUint8(ByteTwo);
2706   }
2707   EmitUint8(0xEF);
2708   EmitXmmRegisterOperand(dst.LowBits(), src2);
2709 }
2710 
2711 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2712 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2713   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2714   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2715   bool is_twobyte_form = false;
2716   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2717   if (!src2.NeedsRex()) {
2718     is_twobyte_form = true;
2719   }
2720   X86_64ManagedRegister vvvv_reg =
2721       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2722   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2723   if (is_twobyte_form) {
2724     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2725   } else {
2726     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2727                                    /*X=*/ false,
2728                                    src2.NeedsRex(),
2729                                    SET_VEX_M_0F);
2730     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2731   }
2732   EmitUint8(ByteZero);
2733   EmitUint8(ByteOne);
2734   if (!is_twobyte_form) {
2735     EmitUint8(ByteTwo);
2736   }
2737   EmitUint8(0x57);
2738   EmitXmmRegisterOperand(dst.LowBits(), src2);
2739 }
2740 
2741 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2742 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2743   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2744   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745   bool is_twobyte_form = false;
2746   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2747   if (!src2.NeedsRex()) {
2748     is_twobyte_form = true;
2749   }
2750   X86_64ManagedRegister vvvv_reg =
2751       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2752   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2753   if (is_twobyte_form) {
2754     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2755   } else {
2756     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2757                                    /*X=*/ false,
2758                                    src2.NeedsRex(),
2759                                    SET_VEX_M_0F);
2760     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2761   }
2762   EmitUint8(ByteZero);
2763   EmitUint8(ByteOne);
2764   if (!is_twobyte_form) {
2765     EmitUint8(ByteTwo);
2766   }
2767   EmitUint8(0x57);
2768   EmitXmmRegisterOperand(dst.LowBits(), src2);
2769 }
2770 
andpd(XmmRegister dst,const Address & src)2771 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773   EmitUint8(0x66);
2774   EmitOptionalRex32(dst, src);
2775   EmitUint8(0x0F);
2776   EmitUint8(0x54);
2777   EmitOperand(dst.LowBits(), src);
2778 }
2779 
andpd(XmmRegister dst,XmmRegister src)2780 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2781   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2782   EmitUint8(0x66);
2783   EmitOptionalRex32(dst, src);
2784   EmitUint8(0x0F);
2785   EmitUint8(0x54);
2786   EmitXmmRegisterOperand(dst.LowBits(), src);
2787 }
2788 
andps(XmmRegister dst,XmmRegister src)2789 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2790   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2791   EmitOptionalRex32(dst, src);
2792   EmitUint8(0x0F);
2793   EmitUint8(0x54);
2794   EmitXmmRegisterOperand(dst.LowBits(), src);
2795 }
2796 
pand(XmmRegister dst,XmmRegister src)2797 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2798   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2799   EmitUint8(0x66);
2800   EmitOptionalRex32(dst, src);
2801   EmitUint8(0x0F);
2802   EmitUint8(0xDB);
2803   EmitXmmRegisterOperand(dst.LowBits(), src);
2804 }
2805 
2806 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2807 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2808   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2809   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2810   bool is_twobyte_form = false;
2811   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2812   if (!src2.NeedsRex()) {
2813     is_twobyte_form = true;
2814   }
2815   X86_64ManagedRegister vvvv_reg =
2816       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2817   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2818   if (is_twobyte_form) {
2819     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2820   } else {
2821     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2822                                    /*X=*/ false,
2823                                    src2.NeedsRex(),
2824                                    SET_VEX_M_0F);
2825     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2826   }
2827   EmitUint8(ByteZero);
2828   EmitUint8(ByteOne);
2829   if (!is_twobyte_form) {
2830     EmitUint8(ByteTwo);
2831   }
2832   EmitUint8(0xDB);
2833   EmitXmmRegisterOperand(dst.LowBits(), src2);
2834 }
2835 
2836 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2837 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2838   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2839   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2840   bool is_twobyte_form = false;
2841   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2842   if (!src2.NeedsRex()) {
2843     is_twobyte_form = true;
2844   }
2845   X86_64ManagedRegister vvvv_reg =
2846       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2847   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2848   if (is_twobyte_form) {
2849     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2850   } else {
2851     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2852                                    /*X=*/ false,
2853                                    src2.NeedsRex(),
2854                                    SET_VEX_M_0F);
2855     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2856   }
2857   EmitUint8(ByteZero);
2858   EmitUint8(ByteOne);
2859   if (!is_twobyte_form) {
2860     EmitUint8(ByteTwo);
2861   }
2862   EmitUint8(0x54);
2863   EmitXmmRegisterOperand(dst.LowBits(), src2);
2864 }
2865 
2866 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2867 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2868   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2869   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2870   bool is_twobyte_form = false;
2871   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2872   if (!src2.NeedsRex()) {
2873     is_twobyte_form = true;
2874   }
2875   X86_64ManagedRegister vvvv_reg =
2876       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2877   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2878   if (is_twobyte_form) {
2879     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2880   } else {
2881     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2882                                    /*X=*/ false,
2883                                    src2.NeedsRex(),
2884                                    SET_VEX_M_0F);
2885     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2886   }
2887   EmitUint8(ByteZero);
2888   EmitUint8(ByteOne);
2889   if (!is_twobyte_form) {
2890     EmitUint8(ByteTwo);
2891   }
2892   EmitUint8(0x54);
2893   EmitXmmRegisterOperand(dst.LowBits(), src2);
2894 }
2895 
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2896 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2897   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2898   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2899   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2900                                           /*X=*/ false,
2901                                           src2.NeedsRex(),
2902                                           SET_VEX_M_0F_38);
2903   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2904                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2905                                           SET_VEX_L_128,
2906                                           SET_VEX_PP_NONE);
2907   EmitUint8(byte_zero);
2908   EmitUint8(byte_one);
2909   EmitUint8(byte_two);
2910   // Opcode field
2911   EmitUint8(0xF2);
2912   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2913 }
2914 
andnpd(XmmRegister dst,XmmRegister src)2915 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2917   EmitUint8(0x66);
2918   EmitOptionalRex32(dst, src);
2919   EmitUint8(0x0F);
2920   EmitUint8(0x55);
2921   EmitXmmRegisterOperand(dst.LowBits(), src);
2922 }
2923 
andnps(XmmRegister dst,XmmRegister src)2924 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2925   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2926   EmitOptionalRex32(dst, src);
2927   EmitUint8(0x0F);
2928   EmitUint8(0x55);
2929   EmitXmmRegisterOperand(dst.LowBits(), src);
2930 }
2931 
pandn(XmmRegister dst,XmmRegister src)2932 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2933   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2934   EmitUint8(0x66);
2935   EmitOptionalRex32(dst, src);
2936   EmitUint8(0x0F);
2937   EmitUint8(0xDF);
2938   EmitXmmRegisterOperand(dst.LowBits(), src);
2939 }
2940 
2941 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2942 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2943   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2944   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2945   bool is_twobyte_form = false;
2946   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2947   if (!src2.NeedsRex()) {
2948     is_twobyte_form = true;
2949   }
2950   X86_64ManagedRegister vvvv_reg =
2951       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2952   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2953   if (is_twobyte_form) {
2954     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2955   } else {
2956     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2957                                    /*X=*/ false,
2958                                    src2.NeedsRex(),
2959                                    SET_VEX_M_0F);
2960     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2961   }
2962   EmitUint8(ByteZero);
2963   EmitUint8(ByteOne);
2964   if (!is_twobyte_form) {
2965     EmitUint8(ByteTwo);
2966   }
2967   EmitUint8(0xDF);
2968   EmitXmmRegisterOperand(dst.LowBits(), src2);
2969 }
2970 
2971 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2972 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2973   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975   bool is_twobyte_form = false;
2976   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2977   if (!src2.NeedsRex()) {
2978     is_twobyte_form = true;
2979   }
2980   X86_64ManagedRegister vvvv_reg =
2981       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2982   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2983   if (is_twobyte_form) {
2984     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2985   } else {
2986     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2987                                    /*X=*/ false,
2988                                    src2.NeedsRex(),
2989                                    SET_VEX_M_0F);
2990     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2991   }
2992   EmitUint8(ByteZero);
2993   EmitUint8(ByteOne);
2994   if (!is_twobyte_form) {
2995     EmitUint8(ByteTwo);
2996   }
2997   EmitUint8(0x55);
2998   EmitXmmRegisterOperand(dst.LowBits(), src2);
2999 }
3000 
3001 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3002 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3003   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005   bool is_twobyte_form = false;
3006   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3007   if (!src2.NeedsRex()) {
3008     is_twobyte_form = true;
3009   }
3010   X86_64ManagedRegister vvvv_reg =
3011       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3012   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3013   if (is_twobyte_form) {
3014     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3015   } else {
3016     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3017                                    /*X=*/ false,
3018                                    src2.NeedsRex(),
3019                                    SET_VEX_M_0F);
3020     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3021   }
3022   EmitUint8(ByteZero);
3023   EmitUint8(ByteOne);
3024   if (!is_twobyte_form) {
3025     EmitUint8(ByteTwo);
3026   }
3027   EmitUint8(0x55);
3028   EmitXmmRegisterOperand(dst.LowBits(), src2);
3029 }
3030 
orpd(XmmRegister dst,XmmRegister src)3031 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
3032   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3033   EmitUint8(0x66);
3034   EmitOptionalRex32(dst, src);
3035   EmitUint8(0x0F);
3036   EmitUint8(0x56);
3037   EmitXmmRegisterOperand(dst.LowBits(), src);
3038 }
3039 
orps(XmmRegister dst,XmmRegister src)3040 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3041   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3042   EmitOptionalRex32(dst, src);
3043   EmitUint8(0x0F);
3044   EmitUint8(0x56);
3045   EmitXmmRegisterOperand(dst.LowBits(), src);
3046 }
3047 
por(XmmRegister dst,XmmRegister src)3048 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3049   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3050   EmitUint8(0x66);
3051   EmitOptionalRex32(dst, src);
3052   EmitUint8(0x0F);
3053   EmitUint8(0xEB);
3054   EmitXmmRegisterOperand(dst.LowBits(), src);
3055 }
3056 
3057 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3058 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3059   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3060   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3061   bool is_twobyte_form = false;
3062   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3063   if (!src2.NeedsRex()) {
3064     is_twobyte_form = true;
3065   }
3066   X86_64ManagedRegister vvvv_reg =
3067       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3068   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3069   if (is_twobyte_form) {
3070     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3071   } else {
3072     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3073                                    /*X=*/ false,
3074                                    src2.NeedsRex(),
3075                                    SET_VEX_M_0F);
3076     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3077   }
3078   EmitUint8(ByteZero);
3079   EmitUint8(ByteOne);
3080   if (!is_twobyte_form) {
3081     EmitUint8(ByteTwo);
3082   }
3083   EmitUint8(0xEB);
3084   EmitXmmRegisterOperand(dst.LowBits(), src2);
3085 }
3086 
3087 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3088 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3089   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3090   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3091   bool is_twobyte_form = false;
3092   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3093   if (!src2.NeedsRex()) {
3094     is_twobyte_form = true;
3095   }
3096   X86_64ManagedRegister vvvv_reg =
3097       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3098   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3099   if (is_twobyte_form) {
3100     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3101   } else {
3102     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3103                                    /*X=*/ false,
3104                                    src2.NeedsRex(),
3105                                    SET_VEX_M_0F);
3106     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3107   }
3108   EmitUint8(ByteZero);
3109   EmitUint8(ByteOne);
3110   if (!is_twobyte_form) {
3111     EmitUint8(ByteTwo);
3112   }
3113   EmitUint8(0x56);
3114   EmitXmmRegisterOperand(dst.LowBits(), src2);
3115 }
3116 
3117 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3118 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3119   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121   bool is_twobyte_form = false;
3122   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3123   if (!src2.NeedsRex()) {
3124     is_twobyte_form = true;
3125   }
3126   X86_64ManagedRegister vvvv_reg =
3127       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3128   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3129   if (is_twobyte_form) {
3130     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3131   } else {
3132     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3133                                    /*X=*/ false,
3134                                    src2.NeedsRex(),
3135                                    SET_VEX_M_0F);
3136     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3137   }
3138   EmitUint8(ByteZero);
3139   EmitUint8(ByteOne);
3140   if (!is_twobyte_form) {
3141     EmitUint8(ByteTwo);
3142   }
3143   EmitUint8(0x56);
3144   EmitXmmRegisterOperand(dst.LowBits(), src2);
3145 }
3146 
pavgb(XmmRegister dst,XmmRegister src)3147 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149   EmitUint8(0x66);
3150   EmitOptionalRex32(dst, src);
3151   EmitUint8(0x0F);
3152   EmitUint8(0xE0);
3153   EmitXmmRegisterOperand(dst.LowBits(), src);
3154 }
3155 
pavgw(XmmRegister dst,XmmRegister src)3156 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3157   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3158   EmitUint8(0x66);
3159   EmitOptionalRex32(dst, src);
3160   EmitUint8(0x0F);
3161   EmitUint8(0xE3);
3162   EmitXmmRegisterOperand(dst.LowBits(), src);
3163 }
3164 
psadbw(XmmRegister dst,XmmRegister src)3165 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3166   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3167   EmitUint8(0x66);
3168   EmitOptionalRex32(dst, src);
3169   EmitUint8(0x0F);
3170   EmitUint8(0xF6);
3171   EmitXmmRegisterOperand(dst.LowBits(), src);
3172 }
3173 
pmaddwd(XmmRegister dst,XmmRegister src)3174 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3176   EmitUint8(0x66);
3177   EmitOptionalRex32(dst, src);
3178   EmitUint8(0x0F);
3179   EmitUint8(0xF5);
3180   EmitXmmRegisterOperand(dst.LowBits(), src);
3181 }
3182 
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3183 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3184   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3185   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3186   bool is_twobyte_form = false;
3187   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3188   if (!src2.NeedsRex()) {
3189     is_twobyte_form = true;
3190   }
3191   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3192   X86_64ManagedRegister vvvv_reg =
3193       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3194   if (is_twobyte_form) {
3195     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3196   } else {
3197     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3198                                    /*X=*/ false,
3199                                    src2.NeedsRex(),
3200                                    SET_VEX_M_0F);
3201     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3202   }
3203   EmitUint8(ByteZero);
3204   EmitUint8(ByteOne);
3205   if (!is_twobyte_form) {
3206     EmitUint8(ByteTwo);
3207   }
3208   EmitUint8(0xF5);
3209   EmitXmmRegisterOperand(dst.LowBits(), src2);
3210 }
3211 
phaddw(XmmRegister dst,XmmRegister src)3212 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3213   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3214   EmitUint8(0x66);
3215   EmitOptionalRex32(dst, src);
3216   EmitUint8(0x0F);
3217   EmitUint8(0x38);
3218   EmitUint8(0x01);
3219   EmitXmmRegisterOperand(dst.LowBits(), src);
3220 }
3221 
phaddd(XmmRegister dst,XmmRegister src)3222 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3223   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3224   EmitUint8(0x66);
3225   EmitOptionalRex32(dst, src);
3226   EmitUint8(0x0F);
3227   EmitUint8(0x38);
3228   EmitUint8(0x02);
3229   EmitXmmRegisterOperand(dst.LowBits(), src);
3230 }
3231 
haddps(XmmRegister dst,XmmRegister src)3232 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3233   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3234   EmitUint8(0xF2);
3235   EmitOptionalRex32(dst, src);
3236   EmitUint8(0x0F);
3237   EmitUint8(0x7C);
3238   EmitXmmRegisterOperand(dst.LowBits(), src);
3239 }
3240 
haddpd(XmmRegister dst,XmmRegister src)3241 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3243   EmitUint8(0x66);
3244   EmitOptionalRex32(dst, src);
3245   EmitUint8(0x0F);
3246   EmitUint8(0x7C);
3247   EmitXmmRegisterOperand(dst.LowBits(), src);
3248 }
3249 
phsubw(XmmRegister dst,XmmRegister src)3250 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252   EmitUint8(0x66);
3253   EmitOptionalRex32(dst, src);
3254   EmitUint8(0x0F);
3255   EmitUint8(0x38);
3256   EmitUint8(0x05);
3257   EmitXmmRegisterOperand(dst.LowBits(), src);
3258 }
3259 
phsubd(XmmRegister dst,XmmRegister src)3260 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3262   EmitUint8(0x66);
3263   EmitOptionalRex32(dst, src);
3264   EmitUint8(0x0F);
3265   EmitUint8(0x38);
3266   EmitUint8(0x06);
3267   EmitXmmRegisterOperand(dst.LowBits(), src);
3268 }
3269 
hsubps(XmmRegister dst,XmmRegister src)3270 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3271   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3272   EmitUint8(0xF2);
3273   EmitOptionalRex32(dst, src);
3274   EmitUint8(0x0F);
3275   EmitUint8(0x7D);
3276   EmitXmmRegisterOperand(dst.LowBits(), src);
3277 }
3278 
hsubpd(XmmRegister dst,XmmRegister src)3279 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3281   EmitUint8(0x66);
3282   EmitOptionalRex32(dst, src);
3283   EmitUint8(0x0F);
3284   EmitUint8(0x7D);
3285   EmitXmmRegisterOperand(dst.LowBits(), src);
3286 }
3287 
pminsb(XmmRegister dst,XmmRegister src)3288 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3289   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3290   EmitUint8(0x66);
3291   EmitOptionalRex32(dst, src);
3292   EmitUint8(0x0F);
3293   EmitUint8(0x38);
3294   EmitUint8(0x38);
3295   EmitXmmRegisterOperand(dst.LowBits(), src);
3296 }
3297 
pmaxsb(XmmRegister dst,XmmRegister src)3298 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3299   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3300   EmitUint8(0x66);
3301   EmitOptionalRex32(dst, src);
3302   EmitUint8(0x0F);
3303   EmitUint8(0x38);
3304   EmitUint8(0x3C);
3305   EmitXmmRegisterOperand(dst.LowBits(), src);
3306 }
3307 
pminsw(XmmRegister dst,XmmRegister src)3308 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3309   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3310   EmitUint8(0x66);
3311   EmitOptionalRex32(dst, src);
3312   EmitUint8(0x0F);
3313   EmitUint8(0xEA);
3314   EmitXmmRegisterOperand(dst.LowBits(), src);
3315 }
3316 
pmaxsw(XmmRegister dst,XmmRegister src)3317 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3318   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3319   EmitUint8(0x66);
3320   EmitOptionalRex32(dst, src);
3321   EmitUint8(0x0F);
3322   EmitUint8(0xEE);
3323   EmitXmmRegisterOperand(dst.LowBits(), src);
3324 }
3325 
pminsd(XmmRegister dst,XmmRegister src)3326 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3327   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3328   EmitUint8(0x66);
3329   EmitOptionalRex32(dst, src);
3330   EmitUint8(0x0F);
3331   EmitUint8(0x38);
3332   EmitUint8(0x39);
3333   EmitXmmRegisterOperand(dst.LowBits(), src);
3334 }
3335 
pmaxsd(XmmRegister dst,XmmRegister src)3336 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3337   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3338   EmitUint8(0x66);
3339   EmitOptionalRex32(dst, src);
3340   EmitUint8(0x0F);
3341   EmitUint8(0x38);
3342   EmitUint8(0x3D);
3343   EmitXmmRegisterOperand(dst.LowBits(), src);
3344 }
3345 
pminub(XmmRegister dst,XmmRegister src)3346 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3348   EmitUint8(0x66);
3349   EmitOptionalRex32(dst, src);
3350   EmitUint8(0x0F);
3351   EmitUint8(0xDA);
3352   EmitXmmRegisterOperand(dst.LowBits(), src);
3353 }
3354 
pmaxub(XmmRegister dst,XmmRegister src)3355 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3357   EmitUint8(0x66);
3358   EmitOptionalRex32(dst, src);
3359   EmitUint8(0x0F);
3360   EmitUint8(0xDE);
3361   EmitXmmRegisterOperand(dst.LowBits(), src);
3362 }
3363 
pminuw(XmmRegister dst,XmmRegister src)3364 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3366   EmitUint8(0x66);
3367   EmitOptionalRex32(dst, src);
3368   EmitUint8(0x0F);
3369   EmitUint8(0x38);
3370   EmitUint8(0x3A);
3371   EmitXmmRegisterOperand(dst.LowBits(), src);
3372 }
3373 
pmaxuw(XmmRegister dst,XmmRegister src)3374 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3376   EmitUint8(0x66);
3377   EmitOptionalRex32(dst, src);
3378   EmitUint8(0x0F);
3379   EmitUint8(0x38);
3380   EmitUint8(0x3E);
3381   EmitXmmRegisterOperand(dst.LowBits(), src);
3382 }
3383 
pminud(XmmRegister dst,XmmRegister src)3384 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3386   EmitUint8(0x66);
3387   EmitOptionalRex32(dst, src);
3388   EmitUint8(0x0F);
3389   EmitUint8(0x38);
3390   EmitUint8(0x3B);
3391   EmitXmmRegisterOperand(dst.LowBits(), src);
3392 }
3393 
pmaxud(XmmRegister dst,XmmRegister src)3394 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3395   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3396   EmitUint8(0x66);
3397   EmitOptionalRex32(dst, src);
3398   EmitUint8(0x0F);
3399   EmitUint8(0x38);
3400   EmitUint8(0x3F);
3401   EmitXmmRegisterOperand(dst.LowBits(), src);
3402 }
3403 
minps(XmmRegister dst,XmmRegister src)3404 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3406   EmitOptionalRex32(dst, src);
3407   EmitUint8(0x0F);
3408   EmitUint8(0x5D);
3409   EmitXmmRegisterOperand(dst.LowBits(), src);
3410 }
3411 
maxps(XmmRegister dst,XmmRegister src)3412 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3413   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3414   EmitOptionalRex32(dst, src);
3415   EmitUint8(0x0F);
3416   EmitUint8(0x5F);
3417   EmitXmmRegisterOperand(dst.LowBits(), src);
3418 }
3419 
minpd(XmmRegister dst,XmmRegister src)3420 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3422   EmitUint8(0x66);
3423   EmitOptionalRex32(dst, src);
3424   EmitUint8(0x0F);
3425   EmitUint8(0x5D);
3426   EmitXmmRegisterOperand(dst.LowBits(), src);
3427 }
3428 
maxpd(XmmRegister dst,XmmRegister src)3429 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3430   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3431   EmitUint8(0x66);
3432   EmitOptionalRex32(dst, src);
3433   EmitUint8(0x0F);
3434   EmitUint8(0x5F);
3435   EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437 
pcmpeqb(XmmRegister dst,XmmRegister src)3438 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3439   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440   EmitUint8(0x66);
3441   EmitOptionalRex32(dst, src);
3442   EmitUint8(0x0F);
3443   EmitUint8(0x74);
3444   EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446 
pcmpeqw(XmmRegister dst,XmmRegister src)3447 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3448   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449   EmitUint8(0x66);
3450   EmitOptionalRex32(dst, src);
3451   EmitUint8(0x0F);
3452   EmitUint8(0x75);
3453   EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455 
pcmpeqd(XmmRegister dst,XmmRegister src)3456 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3457   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458   EmitUint8(0x66);
3459   EmitOptionalRex32(dst, src);
3460   EmitUint8(0x0F);
3461   EmitUint8(0x76);
3462   EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464 
pcmpeqq(XmmRegister dst,XmmRegister src)3465 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3466   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467   EmitUint8(0x66);
3468   EmitOptionalRex32(dst, src);
3469   EmitUint8(0x0F);
3470   EmitUint8(0x38);
3471   EmitUint8(0x29);
3472   EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474 
pcmpgtb(XmmRegister dst,XmmRegister src)3475 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3476   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477   EmitUint8(0x66);
3478   EmitOptionalRex32(dst, src);
3479   EmitUint8(0x0F);
3480   EmitUint8(0x64);
3481   EmitXmmRegisterOperand(dst.LowBits(), src);
3482 }
3483 
pcmpgtw(XmmRegister dst,XmmRegister src)3484 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3485   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3486   EmitUint8(0x66);
3487   EmitOptionalRex32(dst, src);
3488   EmitUint8(0x0F);
3489   EmitUint8(0x65);
3490   EmitXmmRegisterOperand(dst.LowBits(), src);
3491 }
3492 
pcmpgtd(XmmRegister dst,XmmRegister src)3493 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3494   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3495   EmitUint8(0x66);
3496   EmitOptionalRex32(dst, src);
3497   EmitUint8(0x0F);
3498   EmitUint8(0x66);
3499   EmitXmmRegisterOperand(dst.LowBits(), src);
3500 }
3501 
pcmpgtq(XmmRegister dst,XmmRegister src)3502 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3503   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3504   EmitUint8(0x66);
3505   EmitOptionalRex32(dst, src);
3506   EmitUint8(0x0F);
3507   EmitUint8(0x38);
3508   EmitUint8(0x37);
3509   EmitXmmRegisterOperand(dst.LowBits(), src);
3510 }
3511 
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3512 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3513   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3514   EmitUint8(0x66);
3515   EmitOptionalRex32(dst, src);
3516   EmitUint8(0x0F);
3517   EmitUint8(0xC6);
3518   EmitXmmRegisterOperand(dst.LowBits(), src);
3519   EmitUint8(imm.value());
3520 }
3521 
3522 
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3523 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3524   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3525   EmitOptionalRex32(dst, src);
3526   EmitUint8(0x0F);
3527   EmitUint8(0xC6);
3528   EmitXmmRegisterOperand(dst.LowBits(), src);
3529   EmitUint8(imm.value());
3530 }
3531 
3532 
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3533 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3534   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3535   EmitUint8(0x66);
3536   EmitOptionalRex32(dst, src);
3537   EmitUint8(0x0F);
3538   EmitUint8(0x70);
3539   EmitXmmRegisterOperand(dst.LowBits(), src);
3540   EmitUint8(imm.value());
3541 }
3542 
3543 
punpcklbw(XmmRegister dst,XmmRegister src)3544 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3546   EmitUint8(0x66);
3547   EmitOptionalRex32(dst, src);
3548   EmitUint8(0x0F);
3549   EmitUint8(0x60);
3550   EmitXmmRegisterOperand(dst.LowBits(), src);
3551 }
3552 
3553 
punpcklwd(XmmRegister dst,XmmRegister src)3554 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3556   EmitUint8(0x66);
3557   EmitOptionalRex32(dst, src);
3558   EmitUint8(0x0F);
3559   EmitUint8(0x61);
3560   EmitXmmRegisterOperand(dst.LowBits(), src);
3561 }
3562 
3563 
punpckldq(XmmRegister dst,XmmRegister src)3564 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3565   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3566   EmitUint8(0x66);
3567   EmitOptionalRex32(dst, src);
3568   EmitUint8(0x0F);
3569   EmitUint8(0x62);
3570   EmitXmmRegisterOperand(dst.LowBits(), src);
3571 }
3572 
3573 
punpcklqdq(XmmRegister dst,XmmRegister src)3574 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3575   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3576   EmitUint8(0x66);
3577   EmitOptionalRex32(dst, src);
3578   EmitUint8(0x0F);
3579   EmitUint8(0x6C);
3580   EmitXmmRegisterOperand(dst.LowBits(), src);
3581 }
3582 
3583 
punpckhbw(XmmRegister dst,XmmRegister src)3584 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3585   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3586   EmitUint8(0x66);
3587   EmitOptionalRex32(dst, src);
3588   EmitUint8(0x0F);
3589   EmitUint8(0x68);
3590   EmitXmmRegisterOperand(dst.LowBits(), src);
3591 }
3592 
3593 
punpckhwd(XmmRegister dst,XmmRegister src)3594 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3595   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3596   EmitUint8(0x66);
3597   EmitOptionalRex32(dst, src);
3598   EmitUint8(0x0F);
3599   EmitUint8(0x69);
3600   EmitXmmRegisterOperand(dst.LowBits(), src);
3601 }
3602 
3603 
punpckhdq(XmmRegister dst,XmmRegister src)3604 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3605   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3606   EmitUint8(0x66);
3607   EmitOptionalRex32(dst, src);
3608   EmitUint8(0x0F);
3609   EmitUint8(0x6A);
3610   EmitXmmRegisterOperand(dst.LowBits(), src);
3611 }
3612 
3613 
punpckhqdq(XmmRegister dst,XmmRegister src)3614 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3615   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3616   EmitUint8(0x66);
3617   EmitOptionalRex32(dst, src);
3618   EmitUint8(0x0F);
3619   EmitUint8(0x6D);
3620   EmitXmmRegisterOperand(dst.LowBits(), src);
3621 }
3622 
3623 
psllw(XmmRegister reg,const Immediate & shift_count)3624 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3625   DCHECK(shift_count.is_uint8());
3626   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3627   EmitUint8(0x66);
3628   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3629   EmitUint8(0x0F);
3630   EmitUint8(0x71);
3631   EmitXmmRegisterOperand(6, reg);
3632   EmitUint8(shift_count.value());
3633 }
3634 
3635 
pslld(XmmRegister reg,const Immediate & shift_count)3636 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3637   DCHECK(shift_count.is_uint8());
3638   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3639   EmitUint8(0x66);
3640   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3641   EmitUint8(0x0F);
3642   EmitUint8(0x72);
3643   EmitXmmRegisterOperand(6, reg);
3644   EmitUint8(shift_count.value());
3645 }
3646 
3647 
psllq(XmmRegister reg,const Immediate & shift_count)3648 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3649   DCHECK(shift_count.is_uint8());
3650   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3651   EmitUint8(0x66);
3652   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3653   EmitUint8(0x0F);
3654   EmitUint8(0x73);
3655   EmitXmmRegisterOperand(6, reg);
3656   EmitUint8(shift_count.value());
3657 }
3658 
3659 
psraw(XmmRegister reg,const Immediate & shift_count)3660 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3661   DCHECK(shift_count.is_uint8());
3662   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3663   EmitUint8(0x66);
3664   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3665   EmitUint8(0x0F);
3666   EmitUint8(0x71);
3667   EmitXmmRegisterOperand(4, reg);
3668   EmitUint8(shift_count.value());
3669 }
3670 
3671 
psrad(XmmRegister reg,const Immediate & shift_count)3672 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3673   DCHECK(shift_count.is_uint8());
3674   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3675   EmitUint8(0x66);
3676   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3677   EmitUint8(0x0F);
3678   EmitUint8(0x72);
3679   EmitXmmRegisterOperand(4, reg);
3680   EmitUint8(shift_count.value());
3681 }
3682 
3683 
psrlw(XmmRegister reg,const Immediate & shift_count)3684 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3685   DCHECK(shift_count.is_uint8());
3686   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3687   EmitUint8(0x66);
3688   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3689   EmitUint8(0x0F);
3690   EmitUint8(0x71);
3691   EmitXmmRegisterOperand(2, reg);
3692   EmitUint8(shift_count.value());
3693 }
3694 
3695 
psrld(XmmRegister reg,const Immediate & shift_count)3696 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3697   DCHECK(shift_count.is_uint8());
3698   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3699   EmitUint8(0x66);
3700   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3701   EmitUint8(0x0F);
3702   EmitUint8(0x72);
3703   EmitXmmRegisterOperand(2, reg);
3704   EmitUint8(shift_count.value());
3705 }
3706 
3707 
psrlq(XmmRegister reg,const Immediate & shift_count)3708 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3709   DCHECK(shift_count.is_uint8());
3710   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711   EmitUint8(0x66);
3712   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3713   EmitUint8(0x0F);
3714   EmitUint8(0x73);
3715   EmitXmmRegisterOperand(2, reg);
3716   EmitUint8(shift_count.value());
3717 }
3718 
3719 
psrldq(XmmRegister reg,const Immediate & shift_count)3720 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3721   DCHECK(shift_count.is_uint8());
3722   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3723   EmitUint8(0x66);
3724   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3725   EmitUint8(0x0F);
3726   EmitUint8(0x73);
3727   EmitXmmRegisterOperand(3, reg);
3728   EmitUint8(shift_count.value());
3729 }
3730 
3731 
fldl(const Address & src)3732 void X86_64Assembler::fldl(const Address& src) {
3733   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3734   EmitUint8(0xDD);
3735   EmitOperand(0, src);
3736 }
3737 
3738 
fstl(const Address & dst)3739 void X86_64Assembler::fstl(const Address& dst) {
3740   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3741   EmitUint8(0xDD);
3742   EmitOperand(2, dst);
3743 }
3744 
3745 
fstpl(const Address & dst)3746 void X86_64Assembler::fstpl(const Address& dst) {
3747   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3748   EmitUint8(0xDD);
3749   EmitOperand(3, dst);
3750 }
3751 
3752 
fstsw()3753 void X86_64Assembler::fstsw() {
3754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3755   EmitUint8(0x9B);
3756   EmitUint8(0xDF);
3757   EmitUint8(0xE0);
3758 }
3759 
3760 
fnstcw(const Address & dst)3761 void X86_64Assembler::fnstcw(const Address& dst) {
3762   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3763   EmitUint8(0xD9);
3764   EmitOperand(7, dst);
3765 }
3766 
3767 
fldcw(const Address & src)3768 void X86_64Assembler::fldcw(const Address& src) {
3769   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3770   EmitUint8(0xD9);
3771   EmitOperand(5, src);
3772 }
3773 
3774 
fistpl(const Address & dst)3775 void X86_64Assembler::fistpl(const Address& dst) {
3776   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3777   EmitUint8(0xDF);
3778   EmitOperand(7, dst);
3779 }
3780 
3781 
fistps(const Address & dst)3782 void X86_64Assembler::fistps(const Address& dst) {
3783   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3784   EmitUint8(0xDB);
3785   EmitOperand(3, dst);
3786 }
3787 
3788 
fildl(const Address & src)3789 void X86_64Assembler::fildl(const Address& src) {
3790   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3791   EmitUint8(0xDF);
3792   EmitOperand(5, src);
3793 }
3794 
3795 
filds(const Address & src)3796 void X86_64Assembler::filds(const Address& src) {
3797   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3798   EmitUint8(0xDB);
3799   EmitOperand(0, src);
3800 }
3801 
3802 
fincstp()3803 void X86_64Assembler::fincstp() {
3804   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3805   EmitUint8(0xD9);
3806   EmitUint8(0xF7);
3807 }
3808 
3809 
ffree(const Immediate & index)3810 void X86_64Assembler::ffree(const Immediate& index) {
3811   CHECK_LT(index.value(), 7);
3812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3813   EmitUint8(0xDD);
3814   EmitUint8(0xC0 + index.value());
3815 }
3816 
3817 
fsin()3818 void X86_64Assembler::fsin() {
3819   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3820   EmitUint8(0xD9);
3821   EmitUint8(0xFE);
3822 }
3823 
3824 
fcos()3825 void X86_64Assembler::fcos() {
3826   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3827   EmitUint8(0xD9);
3828   EmitUint8(0xFF);
3829 }
3830 
3831 
fptan()3832 void X86_64Assembler::fptan() {
3833   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3834   EmitUint8(0xD9);
3835   EmitUint8(0xF2);
3836 }
3837 
fucompp()3838 void X86_64Assembler::fucompp() {
3839   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3840   EmitUint8(0xDA);
3841   EmitUint8(0xE9);
3842 }
3843 
3844 
fprem()3845 void X86_64Assembler::fprem() {
3846   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3847   EmitUint8(0xD9);
3848   EmitUint8(0xF8);
3849 }
3850 
3851 
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))3852 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
3853                                    CpuRegister src,
3854                                    void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
3855   Register src_reg = src.AsRegister();
3856   Register dst_reg = dst.AsRegister();
3857   if (src_reg != RAX && dst_reg != RAX) {
3858     return false;
3859   }
3860   if (dst_reg == RAX) {
3861     std::swap(src_reg, dst_reg);
3862   }
3863   if (dst_reg != RAX) {
3864     // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
3865     (this->*prefix_fn)(CpuRegister(dst_reg));
3866   }
3867   EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
3868   return true;
3869 }
3870 
3871 
xchgb(CpuRegister dst,CpuRegister src)3872 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
3873   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3874   // There is no short version for AL.
3875   EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
3876   EmitUint8(0x86);
3877   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3878 }
3879 
3880 
xchgb(CpuRegister reg,const Address & address)3881 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
3882   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3883   EmitOptionalByteRegNormalizingRex32(reg, address);
3884   EmitUint8(0x86);
3885   EmitOperand(reg.LowBits(), address);
3886 }
3887 
3888 
xchgw(CpuRegister dst,CpuRegister src)3889 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
3890   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3891   EmitOperandSizeOverride();
3892   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3893     // A short version for AX.
3894     return;
3895   }
3896   // General case.
3897   EmitOptionalRex32(dst, src);
3898   EmitUint8(0x87);
3899   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3900 }
3901 
3902 
xchgw(CpuRegister reg,const Address & address)3903 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
3904   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905   EmitOperandSizeOverride();
3906   EmitOptionalRex32(reg, address);
3907   EmitUint8(0x87);
3908   EmitOperand(reg.LowBits(), address);
3909 }
3910 
3911 
xchgl(CpuRegister dst,CpuRegister src)3912 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3913   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3914   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3915     // A short version for EAX.
3916     return;
3917   }
3918   // General case.
3919   EmitOptionalRex32(dst, src);
3920   EmitUint8(0x87);
3921   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3922 }
3923 
3924 
xchgl(CpuRegister reg,const Address & address)3925 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3927   EmitOptionalRex32(reg, address);
3928   EmitUint8(0x87);
3929   EmitOperand(reg.LowBits(), address);
3930 }
3931 
3932 
xchgq(CpuRegister dst,CpuRegister src)3933 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3934   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3935   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
3936     // A short version for RAX.
3937     return;
3938   }
3939   // General case.
3940   EmitRex64(dst, src);
3941   EmitUint8(0x87);
3942   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3943 }
3944 
3945 
xchgq(CpuRegister reg,const Address & address)3946 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
3947   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3948   EmitRex64(reg, address);
3949   EmitUint8(0x87);
3950   EmitOperand(reg.LowBits(), address);
3951 }
3952 
3953 
xaddb(CpuRegister dst,CpuRegister src)3954 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
3955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3956   EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
3957   EmitUint8(0x0F);
3958   EmitUint8(0xC0);
3959   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3960 }
3961 
3962 
xaddb(const Address & address,CpuRegister reg)3963 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
3964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3965   EmitOptionalByteRegNormalizingRex32(reg, address);
3966   EmitUint8(0x0F);
3967   EmitUint8(0xC0);
3968   EmitOperand(reg.LowBits(), address);
3969 }
3970 
3971 
xaddw(CpuRegister dst,CpuRegister src)3972 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
3973   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3974   EmitOperandSizeOverride();
3975   EmitOptionalRex32(src, dst);
3976   EmitUint8(0x0F);
3977   EmitUint8(0xC1);
3978   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3979 }
3980 
3981 
xaddw(const Address & address,CpuRegister reg)3982 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
3983   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3984   EmitOperandSizeOverride();
3985   EmitOptionalRex32(reg, address);
3986   EmitUint8(0x0F);
3987   EmitUint8(0xC1);
3988   EmitOperand(reg.LowBits(), address);
3989 }
3990 
3991 
xaddl(CpuRegister dst,CpuRegister src)3992 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
3993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3994   EmitOptionalRex32(src, dst);
3995   EmitUint8(0x0F);
3996   EmitUint8(0xC1);
3997   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3998 }
3999 
4000 
xaddl(const Address & address,CpuRegister reg)4001 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
4002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4003   EmitOptionalRex32(reg, address);
4004   EmitUint8(0x0F);
4005   EmitUint8(0xC1);
4006   EmitOperand(reg.LowBits(), address);
4007 }
4008 
4009 
xaddq(CpuRegister dst,CpuRegister src)4010 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
4011   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4012   EmitRex64(src, dst);
4013   EmitUint8(0x0F);
4014   EmitUint8(0xC1);
4015   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4016 }
4017 
4018 
xaddq(const Address & address,CpuRegister reg)4019 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
4020   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4021   EmitRex64(reg, address);
4022   EmitUint8(0x0F);
4023   EmitUint8(0xC1);
4024   EmitOperand(reg.LowBits(), address);
4025 }
4026 
4027 
cmpb(const Address & address,const Immediate & imm)4028 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
4029   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4030   CHECK(imm.is_int32());
4031   EmitOptionalRex32(address);
4032   EmitUint8(0x80);
4033   EmitOperand(7, address);
4034   EmitUint8(imm.value() & 0xFF);
4035 }
4036 
4037 
cmpw(const Address & address,const Immediate & imm)4038 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
4039   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4040   CHECK(imm.is_int32());
4041   EmitOperandSizeOverride();
4042   EmitOptionalRex32(address);
4043   EmitComplex(7, address, imm, /* is_16_op= */ true);
4044 }
4045 
4046 
cmpl(CpuRegister reg,const Immediate & imm)4047 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
4048   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4049   CHECK(imm.is_int32());
4050   EmitOptionalRex32(reg);
4051   EmitComplex(7, Operand(reg), imm);
4052 }
4053 
4054 
cmpl(CpuRegister reg0,CpuRegister reg1)4055 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
4056   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4057   EmitOptionalRex32(reg0, reg1);
4058   EmitUint8(0x3B);
4059   EmitOperand(reg0.LowBits(), Operand(reg1));
4060 }
4061 
4062 
cmpl(CpuRegister reg,const Address & address)4063 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
4064   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4065   EmitOptionalRex32(reg, address);
4066   EmitUint8(0x3B);
4067   EmitOperand(reg.LowBits(), address);
4068 }
4069 
4070 
cmpl(const Address & address,CpuRegister reg)4071 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
4072   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4073   EmitOptionalRex32(reg, address);
4074   EmitUint8(0x39);
4075   EmitOperand(reg.LowBits(), address);
4076 }
4077 
4078 
cmpl(const Address & address,const Immediate & imm)4079 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
4080   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4081   CHECK(imm.is_int32());
4082   EmitOptionalRex32(address);
4083   EmitComplex(7, address, imm);
4084 }
4085 
4086 
cmpq(CpuRegister reg0,CpuRegister reg1)4087 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
4088   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4089   EmitRex64(reg0, reg1);
4090   EmitUint8(0x3B);
4091   EmitOperand(reg0.LowBits(), Operand(reg1));
4092 }
4093 
4094 
cmpq(CpuRegister reg,const Immediate & imm)4095 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
4096   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4097   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4098   EmitRex64(reg);
4099   EmitComplex(7, Operand(reg), imm);
4100 }
4101 
4102 
cmpq(CpuRegister reg,const Address & address)4103 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
4104   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4105   EmitRex64(reg, address);
4106   EmitUint8(0x3B);
4107   EmitOperand(reg.LowBits(), address);
4108 }
4109 
4110 
cmpq(const Address & address,const Immediate & imm)4111 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
4112   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4113   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4114   EmitRex64(address);
4115   EmitComplex(7, address, imm);
4116 }
4117 
4118 
addl(CpuRegister dst,CpuRegister src)4119 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
4120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4121   EmitOptionalRex32(dst, src);
4122   EmitUint8(0x03);
4123   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4124 }
4125 
4126 
addl(CpuRegister reg,const Address & address)4127 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
4128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4129   EmitOptionalRex32(reg, address);
4130   EmitUint8(0x03);
4131   EmitOperand(reg.LowBits(), address);
4132 }
4133 
4134 
testl(CpuRegister reg1,CpuRegister reg2)4135 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
4136   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4137   EmitOptionalRex32(reg1, reg2);
4138   EmitUint8(0x85);
4139   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4140 }
4141 
4142 
testl(CpuRegister reg,const Address & address)4143 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
4144   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4145   EmitOptionalRex32(reg, address);
4146   EmitUint8(0x85);
4147   EmitOperand(reg.LowBits(), address);
4148 }
4149 
4150 
testl(CpuRegister reg,const Immediate & immediate)4151 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
4152   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4153   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
4154   // we only test the byte CpuRegister to keep the encoding short.
4155   if (immediate.is_uint8() && reg.AsRegister() < 4) {
4156     // Use zero-extended 8-bit immediate.
4157     if (reg.AsRegister() == RAX) {
4158       EmitUint8(0xA8);
4159     } else {
4160       EmitUint8(0xF6);
4161       EmitUint8(0xC0 + reg.AsRegister());
4162     }
4163     EmitUint8(immediate.value() & 0xFF);
4164   } else if (reg.AsRegister() == RAX) {
4165     // Use short form if the destination is RAX.
4166     EmitUint8(0xA9);
4167     EmitImmediate(immediate);
4168   } else {
4169     EmitOptionalRex32(reg);
4170     EmitUint8(0xF7);
4171     EmitOperand(0, Operand(reg));
4172     EmitImmediate(immediate);
4173   }
4174 }
4175 
4176 
testq(CpuRegister reg1,CpuRegister reg2)4177 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4179   EmitRex64(reg1, reg2);
4180   EmitUint8(0x85);
4181   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4182 }
4183 
4184 
testq(CpuRegister reg,const Address & address)4185 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4186   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4187   EmitRex64(reg, address);
4188   EmitUint8(0x85);
4189   EmitOperand(reg.LowBits(), address);
4190 }
4191 
4192 
testb(const Address & dst,const Immediate & imm)4193 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4194   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4195   EmitOptionalRex32(dst);
4196   EmitUint8(0xF6);
4197   EmitOperand(Register::RAX, dst);
4198   CHECK(imm.is_int8());
4199   EmitUint8(imm.value() & 0xFF);
4200 }
4201 
4202 
testl(const Address & dst,const Immediate & imm)4203 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4204   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4205   EmitOptionalRex32(dst);
4206   EmitUint8(0xF7);
4207   EmitOperand(0, dst);
4208   EmitImmediate(imm);
4209 }
4210 
4211 
andl(CpuRegister dst,CpuRegister src)4212 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4213   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4214   EmitOptionalRex32(dst, src);
4215   EmitUint8(0x23);
4216   EmitOperand(dst.LowBits(), Operand(src));
4217 }
4218 
4219 
andl(CpuRegister reg,const Address & address)4220 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4221   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4222   EmitOptionalRex32(reg, address);
4223   EmitUint8(0x23);
4224   EmitOperand(reg.LowBits(), address);
4225 }
4226 
4227 
andl(CpuRegister dst,const Immediate & imm)4228 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4229   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4230   EmitOptionalRex32(dst);
4231   EmitComplex(4, Operand(dst), imm);
4232 }
4233 
4234 
andq(CpuRegister reg,const Immediate & imm)4235 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4236   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4237   CHECK(imm.is_int32());  // andq only supports 32b immediate.
4238   EmitRex64(reg);
4239   EmitComplex(4, Operand(reg), imm);
4240 }
4241 
4242 
andq(CpuRegister dst,CpuRegister src)4243 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4244   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4245   EmitRex64(dst, src);
4246   EmitUint8(0x23);
4247   EmitOperand(dst.LowBits(), Operand(src));
4248 }
4249 
4250 
andq(CpuRegister dst,const Address & src)4251 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4252   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4253   EmitRex64(dst, src);
4254   EmitUint8(0x23);
4255   EmitOperand(dst.LowBits(), src);
4256 }
4257 
4258 
andw(const Address & address,const Immediate & imm)4259 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4260   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4261   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4262   EmitUint8(0x66);
4263   EmitOptionalRex32(address);
4264   EmitComplex(4, address, imm, /* is_16_op= */ true);
4265 }
4266 
4267 
orl(CpuRegister dst,CpuRegister src)4268 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4270   EmitOptionalRex32(dst, src);
4271   EmitUint8(0x0B);
4272   EmitOperand(dst.LowBits(), Operand(src));
4273 }
4274 
4275 
orl(CpuRegister reg,const Address & address)4276 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4277   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4278   EmitOptionalRex32(reg, address);
4279   EmitUint8(0x0B);
4280   EmitOperand(reg.LowBits(), address);
4281 }
4282 
4283 
orl(CpuRegister dst,const Immediate & imm)4284 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4286   EmitOptionalRex32(dst);
4287   EmitComplex(1, Operand(dst), imm);
4288 }
4289 
4290 
orq(CpuRegister dst,const Immediate & imm)4291 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4292   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4293   CHECK(imm.is_int32());  // orq only supports 32b immediate.
4294   EmitRex64(dst);
4295   EmitComplex(1, Operand(dst), imm);
4296 }
4297 
4298 
orq(CpuRegister dst,CpuRegister src)4299 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4300   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4301   EmitRex64(dst, src);
4302   EmitUint8(0x0B);
4303   EmitOperand(dst.LowBits(), Operand(src));
4304 }
4305 
4306 
orq(CpuRegister dst,const Address & src)4307 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4308   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4309   EmitRex64(dst, src);
4310   EmitUint8(0x0B);
4311   EmitOperand(dst.LowBits(), src);
4312 }
4313 
4314 
xorl(CpuRegister dst,CpuRegister src)4315 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4317   EmitOptionalRex32(dst, src);
4318   EmitUint8(0x33);
4319   EmitOperand(dst.LowBits(), Operand(src));
4320 }
4321 
4322 
xorl(CpuRegister reg,const Address & address)4323 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4324   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4325   EmitOptionalRex32(reg, address);
4326   EmitUint8(0x33);
4327   EmitOperand(reg.LowBits(), address);
4328 }
4329 
4330 
xorl(CpuRegister dst,const Immediate & imm)4331 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4332   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4333   EmitOptionalRex32(dst);
4334   EmitComplex(6, Operand(dst), imm);
4335 }
4336 
4337 
xorq(CpuRegister dst,CpuRegister src)4338 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4340   EmitRex64(dst, src);
4341   EmitUint8(0x33);
4342   EmitOperand(dst.LowBits(), Operand(src));
4343 }
4344 
4345 
xorq(CpuRegister dst,const Immediate & imm)4346 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4348   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
4349   EmitRex64(dst);
4350   EmitComplex(6, Operand(dst), imm);
4351 }
4352 
xorq(CpuRegister dst,const Address & src)4353 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4354   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4355   EmitRex64(dst, src);
4356   EmitUint8(0x33);
4357   EmitOperand(dst.LowBits(), src);
4358 }
4359 
4360 
4361 #if 0
4362 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4363   // REX.WRXB
4364   // W - 64-bit operand
4365   // R - MODRM.reg
4366   // X - SIB.index
4367   // B - MODRM.rm/SIB.base
4368   uint8_t rex = force ? 0x40 : 0;
4369   if (w) {
4370     rex |= 0x48;  // REX.W000
4371   }
4372   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4373     rex |= 0x44;  // REX.0R00
4374     *r = static_cast<Register>(*r - 8);
4375   }
4376   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4377     rex |= 0x42;  // REX.00X0
4378     *x = static_cast<Register>(*x - 8);
4379   }
4380   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4381     rex |= 0x41;  // REX.000B
4382     *b = static_cast<Register>(*b - 8);
4383   }
4384   if (rex != 0) {
4385     EmitUint8(rex);
4386   }
4387 }
4388 
4389 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4390   // REX.WRXB
4391   // W - 64-bit operand
4392   // R - MODRM.reg
4393   // X - SIB.index
4394   // B - MODRM.rm/SIB.base
4395   uint8_t rex = mem->rex();
4396   if (force) {
4397     rex |= 0x40;  // REX.0000
4398   }
4399   if (w) {
4400     rex |= 0x48;  // REX.W000
4401   }
4402   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4403     rex |= 0x44;  // REX.0R00
4404     *dst = static_cast<Register>(*dst - 8);
4405   }
4406   if (rex != 0) {
4407     EmitUint8(rex);
4408   }
4409 }
4410 
4411 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4412 #endif
4413 
addl(CpuRegister reg,const Immediate & imm)4414 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4415   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4416   EmitOptionalRex32(reg);
4417   EmitComplex(0, Operand(reg), imm);
4418 }
4419 
4420 
addq(CpuRegister reg,const Immediate & imm)4421 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4422   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4423   CHECK(imm.is_int32());  // addq only supports 32b immediate.
4424   EmitRex64(reg);
4425   EmitComplex(0, Operand(reg), imm);
4426 }
4427 
4428 
addq(CpuRegister dst,const Address & address)4429 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4430   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4431   EmitRex64(dst, address);
4432   EmitUint8(0x03);
4433   EmitOperand(dst.LowBits(), address);
4434 }
4435 
4436 
addq(CpuRegister dst,CpuRegister src)4437 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4438   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4439   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4440   EmitRex64(src, dst);
4441   EmitUint8(0x01);
4442   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4443 }
4444 
4445 
addl(const Address & address,CpuRegister reg)4446 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4447   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4448   EmitOptionalRex32(reg, address);
4449   EmitUint8(0x01);
4450   EmitOperand(reg.LowBits(), address);
4451 }
4452 
4453 
addl(const Address & address,const Immediate & imm)4454 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4455   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4456   EmitOptionalRex32(address);
4457   EmitComplex(0, address, imm);
4458 }
4459 
4460 
addw(const Address & address,const Immediate & imm)4461 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4462   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4463   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4464   EmitUint8(0x66);
4465   EmitOptionalRex32(address);
4466   EmitComplex(0, address, imm, /* is_16_op= */ true);
4467 }
4468 
4469 
subl(CpuRegister dst,CpuRegister src)4470 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4471   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4472   EmitOptionalRex32(dst, src);
4473   EmitUint8(0x2B);
4474   EmitOperand(dst.LowBits(), Operand(src));
4475 }
4476 
4477 
subl(CpuRegister reg,const Immediate & imm)4478 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4479   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4480   EmitOptionalRex32(reg);
4481   EmitComplex(5, Operand(reg), imm);
4482 }
4483 
4484 
subq(CpuRegister reg,const Immediate & imm)4485 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4486   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4487   CHECK(imm.is_int32());  // subq only supports 32b immediate.
4488   EmitRex64(reg);
4489   EmitComplex(5, Operand(reg), imm);
4490 }
4491 
4492 
subq(CpuRegister dst,CpuRegister src)4493 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4494   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4495   EmitRex64(dst, src);
4496   EmitUint8(0x2B);
4497   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4498 }
4499 
4500 
subq(CpuRegister reg,const Address & address)4501 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4502   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4503   EmitRex64(reg, address);
4504   EmitUint8(0x2B);
4505   EmitOperand(reg.LowBits() & 7, address);
4506 }
4507 
4508 
subl(CpuRegister reg,const Address & address)4509 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4510   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4511   EmitOptionalRex32(reg, address);
4512   EmitUint8(0x2B);
4513   EmitOperand(reg.LowBits(), address);
4514 }
4515 
4516 
cdq()4517 void X86_64Assembler::cdq() {
4518   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4519   EmitUint8(0x99);
4520 }
4521 
4522 
cqo()4523 void X86_64Assembler::cqo() {
4524   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4525   EmitRex64();
4526   EmitUint8(0x99);
4527 }
4528 
4529 
idivl(CpuRegister reg)4530 void X86_64Assembler::idivl(CpuRegister reg) {
4531   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4532   EmitOptionalRex32(reg);
4533   EmitUint8(0xF7);
4534   EmitUint8(0xF8 | reg.LowBits());
4535 }
4536 
4537 
idivq(CpuRegister reg)4538 void X86_64Assembler::idivq(CpuRegister reg) {
4539   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4540   EmitRex64(reg);
4541   EmitUint8(0xF7);
4542   EmitUint8(0xF8 | reg.LowBits());
4543 }
4544 
4545 
divl(CpuRegister reg)4546 void X86_64Assembler::divl(CpuRegister reg) {
4547   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4548   EmitOptionalRex32(reg);
4549   EmitUint8(0xF7);
4550   EmitUint8(0xF0 | reg.LowBits());
4551 }
4552 
4553 
divq(CpuRegister reg)4554 void X86_64Assembler::divq(CpuRegister reg) {
4555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4556   EmitRex64(reg);
4557   EmitUint8(0xF7);
4558   EmitUint8(0xF0 | reg.LowBits());
4559 }
4560 
4561 
imull(CpuRegister dst,CpuRegister src)4562 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4563   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4564   EmitOptionalRex32(dst, src);
4565   EmitUint8(0x0F);
4566   EmitUint8(0xAF);
4567   EmitOperand(dst.LowBits(), Operand(src));
4568 }
4569 
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4570 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4571   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4572   CHECK(imm.is_int32());  // imull only supports 32b immediate.
4573 
4574   EmitOptionalRex32(dst, src);
4575 
4576   // See whether imm can be represented as a sign-extended 8bit value.
4577   int32_t v32 = static_cast<int32_t>(imm.value());
4578   if (IsInt<8>(v32)) {
4579     // Sign-extension works.
4580     EmitUint8(0x6B);
4581     EmitOperand(dst.LowBits(), Operand(src));
4582     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4583   } else {
4584     // Not representable, use full immediate.
4585     EmitUint8(0x69);
4586     EmitOperand(dst.LowBits(), Operand(src));
4587     EmitImmediate(imm);
4588   }
4589 }
4590 
4591 
imull(CpuRegister reg,const Immediate & imm)4592 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4593   imull(reg, reg, imm);
4594 }
4595 
4596 
imull(CpuRegister reg,const Address & address)4597 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4599   EmitOptionalRex32(reg, address);
4600   EmitUint8(0x0F);
4601   EmitUint8(0xAF);
4602   EmitOperand(reg.LowBits(), address);
4603 }
4604 
4605 
imulq(CpuRegister dst,CpuRegister src)4606 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4607   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4608   EmitRex64(dst, src);
4609   EmitUint8(0x0F);
4610   EmitUint8(0xAF);
4611   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4612 }
4613 
4614 
imulq(CpuRegister reg,const Immediate & imm)4615 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4616   imulq(reg, reg, imm);
4617 }
4618 
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4619 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4620   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4621   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
4622 
4623   EmitRex64(dst, reg);
4624 
4625   // See whether imm can be represented as a sign-extended 8bit value.
4626   int64_t v64 = imm.value();
4627   if (IsInt<8>(v64)) {
4628     // Sign-extension works.
4629     EmitUint8(0x6B);
4630     EmitOperand(dst.LowBits(), Operand(reg));
4631     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4632   } else {
4633     // Not representable, use full immediate.
4634     EmitUint8(0x69);
4635     EmitOperand(dst.LowBits(), Operand(reg));
4636     EmitImmediate(imm);
4637   }
4638 }
4639 
imulq(CpuRegister reg,const Address & address)4640 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4642   EmitRex64(reg, address);
4643   EmitUint8(0x0F);
4644   EmitUint8(0xAF);
4645   EmitOperand(reg.LowBits(), address);
4646 }
4647 
4648 
imull(CpuRegister reg)4649 void X86_64Assembler::imull(CpuRegister reg) {
4650   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4651   EmitOptionalRex32(reg);
4652   EmitUint8(0xF7);
4653   EmitOperand(5, Operand(reg));
4654 }
4655 
4656 
imulq(CpuRegister reg)4657 void X86_64Assembler::imulq(CpuRegister reg) {
4658   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4659   EmitRex64(reg);
4660   EmitUint8(0xF7);
4661   EmitOperand(5, Operand(reg));
4662 }
4663 
4664 
imull(const Address & address)4665 void X86_64Assembler::imull(const Address& address) {
4666   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4667   EmitOptionalRex32(address);
4668   EmitUint8(0xF7);
4669   EmitOperand(5, address);
4670 }
4671 
4672 
mull(CpuRegister reg)4673 void X86_64Assembler::mull(CpuRegister reg) {
4674   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4675   EmitOptionalRex32(reg);
4676   EmitUint8(0xF7);
4677   EmitOperand(4, Operand(reg));
4678 }
4679 
4680 
mull(const Address & address)4681 void X86_64Assembler::mull(const Address& address) {
4682   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4683   EmitOptionalRex32(address);
4684   EmitUint8(0xF7);
4685   EmitOperand(4, address);
4686 }
4687 
4688 
shll(CpuRegister reg,const Immediate & imm)4689 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4690   EmitGenericShift(false, 4, reg, imm);
4691 }
4692 
4693 
shlq(CpuRegister reg,const Immediate & imm)4694 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4695   EmitGenericShift(true, 4, reg, imm);
4696 }
4697 
4698 
shll(CpuRegister operand,CpuRegister shifter)4699 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4700   EmitGenericShift(false, 4, operand, shifter);
4701 }
4702 
4703 
shlq(CpuRegister operand,CpuRegister shifter)4704 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4705   EmitGenericShift(true, 4, operand, shifter);
4706 }
4707 
4708 
shrl(CpuRegister reg,const Immediate & imm)4709 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4710   EmitGenericShift(false, 5, reg, imm);
4711 }
4712 
4713 
shrq(CpuRegister reg,const Immediate & imm)4714 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4715   EmitGenericShift(true, 5, reg, imm);
4716 }
4717 
4718 
shrl(CpuRegister operand,CpuRegister shifter)4719 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4720   EmitGenericShift(false, 5, operand, shifter);
4721 }
4722 
4723 
shrq(CpuRegister operand,CpuRegister shifter)4724 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4725   EmitGenericShift(true, 5, operand, shifter);
4726 }
4727 
4728 
sarl(CpuRegister reg,const Immediate & imm)4729 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4730   EmitGenericShift(false, 7, reg, imm);
4731 }
4732 
4733 
sarl(CpuRegister operand,CpuRegister shifter)4734 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4735   EmitGenericShift(false, 7, operand, shifter);
4736 }
4737 
4738 
sarq(CpuRegister reg,const Immediate & imm)4739 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4740   EmitGenericShift(true, 7, reg, imm);
4741 }
4742 
4743 
sarq(CpuRegister operand,CpuRegister shifter)4744 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4745   EmitGenericShift(true, 7, operand, shifter);
4746 }
4747 
4748 
roll(CpuRegister reg,const Immediate & imm)4749 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4750   EmitGenericShift(false, 0, reg, imm);
4751 }
4752 
4753 
roll(CpuRegister operand,CpuRegister shifter)4754 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4755   EmitGenericShift(false, 0, operand, shifter);
4756 }
4757 
4758 
rorl(CpuRegister reg,const Immediate & imm)4759 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4760   EmitGenericShift(false, 1, reg, imm);
4761 }
4762 
4763 
rorl(CpuRegister operand,CpuRegister shifter)4764 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4765   EmitGenericShift(false, 1, operand, shifter);
4766 }
4767 
4768 
rolq(CpuRegister reg,const Immediate & imm)4769 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4770   EmitGenericShift(true, 0, reg, imm);
4771 }
4772 
4773 
rolq(CpuRegister operand,CpuRegister shifter)4774 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4775   EmitGenericShift(true, 0, operand, shifter);
4776 }
4777 
4778 
rorq(CpuRegister reg,const Immediate & imm)4779 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4780   EmitGenericShift(true, 1, reg, imm);
4781 }
4782 
4783 
rorq(CpuRegister operand,CpuRegister shifter)4784 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4785   EmitGenericShift(true, 1, operand, shifter);
4786 }
4787 
4788 
negl(CpuRegister reg)4789 void X86_64Assembler::negl(CpuRegister reg) {
4790   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4791   EmitOptionalRex32(reg);
4792   EmitUint8(0xF7);
4793   EmitOperand(3, Operand(reg));
4794 }
4795 
4796 
negq(CpuRegister reg)4797 void X86_64Assembler::negq(CpuRegister reg) {
4798   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4799   EmitRex64(reg);
4800   EmitUint8(0xF7);
4801   EmitOperand(3, Operand(reg));
4802 }
4803 
4804 
notl(CpuRegister reg)4805 void X86_64Assembler::notl(CpuRegister reg) {
4806   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4807   EmitOptionalRex32(reg);
4808   EmitUint8(0xF7);
4809   EmitUint8(0xD0 | reg.LowBits());
4810 }
4811 
4812 
notq(CpuRegister reg)4813 void X86_64Assembler::notq(CpuRegister reg) {
4814   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4815   EmitRex64(reg);
4816   EmitUint8(0xF7);
4817   EmitOperand(2, Operand(reg));
4818 }
4819 
4820 
enter(const Immediate & imm)4821 void X86_64Assembler::enter(const Immediate& imm) {
4822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4823   EmitUint8(0xC8);
4824   CHECK(imm.is_uint16()) << imm.value();
4825   EmitUint8(imm.value() & 0xFF);
4826   EmitUint8((imm.value() >> 8) & 0xFF);
4827   EmitUint8(0x00);
4828 }
4829 
4830 
leave()4831 void X86_64Assembler::leave() {
4832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4833   EmitUint8(0xC9);
4834 }
4835 
4836 
ret()4837 void X86_64Assembler::ret() {
4838   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4839   EmitUint8(0xC3);
4840 }
4841 
4842 
ret(const Immediate & imm)4843 void X86_64Assembler::ret(const Immediate& imm) {
4844   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4845   EmitUint8(0xC2);
4846   CHECK(imm.is_uint16());
4847   EmitUint8(imm.value() & 0xFF);
4848   EmitUint8((imm.value() >> 8) & 0xFF);
4849 }
4850 
4851 
4852 
nop()4853 void X86_64Assembler::nop() {
4854   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855   EmitUint8(0x90);
4856 }
4857 
4858 
int3()4859 void X86_64Assembler::int3() {
4860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4861   EmitUint8(0xCC);
4862 }
4863 
4864 
hlt()4865 void X86_64Assembler::hlt() {
4866   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4867   EmitUint8(0xF4);
4868 }
4869 
4870 
j(Condition condition,Label * label)4871 void X86_64Assembler::j(Condition condition, Label* label) {
4872   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4873   if (label->IsBound()) {
4874     static const int kShortSize = 2;
4875     static const int kLongSize = 6;
4876     int offset = label->Position() - buffer_.Size();
4877     CHECK_LE(offset, 0);
4878     if (IsInt<8>(offset - kShortSize)) {
4879       EmitUint8(0x70 + condition);
4880       EmitUint8((offset - kShortSize) & 0xFF);
4881     } else {
4882       EmitUint8(0x0F);
4883       EmitUint8(0x80 + condition);
4884       EmitInt32(offset - kLongSize);
4885     }
4886   } else {
4887     EmitUint8(0x0F);
4888     EmitUint8(0x80 + condition);
4889     EmitLabelLink(label);
4890   }
4891 }
4892 
4893 
j(Condition condition,NearLabel * label)4894 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4895   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4896   if (label->IsBound()) {
4897     static const int kShortSize = 2;
4898     int offset = label->Position() - buffer_.Size();
4899     CHECK_LE(offset, 0);
4900     CHECK(IsInt<8>(offset - kShortSize));
4901     EmitUint8(0x70 + condition);
4902     EmitUint8((offset - kShortSize) & 0xFF);
4903   } else {
4904     EmitUint8(0x70 + condition);
4905     EmitLabelLink(label);
4906   }
4907 }
4908 
4909 
jrcxz(NearLabel * label)4910 void X86_64Assembler::jrcxz(NearLabel* label) {
4911   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4912   if (label->IsBound()) {
4913     static const int kShortSize = 2;
4914     int offset = label->Position() - buffer_.Size();
4915     CHECK_LE(offset, 0);
4916     CHECK(IsInt<8>(offset - kShortSize));
4917     EmitUint8(0xE3);
4918     EmitUint8((offset - kShortSize) & 0xFF);
4919   } else {
4920     EmitUint8(0xE3);
4921     EmitLabelLink(label);
4922   }
4923 }
4924 
4925 
jmp(CpuRegister reg)4926 void X86_64Assembler::jmp(CpuRegister reg) {
4927   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4928   EmitOptionalRex32(reg);
4929   EmitUint8(0xFF);
4930   EmitRegisterOperand(4, reg.LowBits());
4931 }
4932 
jmp(const Address & address)4933 void X86_64Assembler::jmp(const Address& address) {
4934   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4935   EmitOptionalRex32(address);
4936   EmitUint8(0xFF);
4937   EmitOperand(4, address);
4938 }
4939 
jmp(Label * label)4940 void X86_64Assembler::jmp(Label* label) {
4941   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4942   if (label->IsBound()) {
4943     static const int kShortSize = 2;
4944     static const int kLongSize = 5;
4945     int offset = label->Position() - buffer_.Size();
4946     CHECK_LE(offset, 0);
4947     if (IsInt<8>(offset - kShortSize)) {
4948       EmitUint8(0xEB);
4949       EmitUint8((offset - kShortSize) & 0xFF);
4950     } else {
4951       EmitUint8(0xE9);
4952       EmitInt32(offset - kLongSize);
4953     }
4954   } else {
4955     EmitUint8(0xE9);
4956     EmitLabelLink(label);
4957   }
4958 }
4959 
4960 
jmp(NearLabel * label)4961 void X86_64Assembler::jmp(NearLabel* label) {
4962   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4963   if (label->IsBound()) {
4964     static const int kShortSize = 2;
4965     int offset = label->Position() - buffer_.Size();
4966     CHECK_LE(offset, 0);
4967     CHECK(IsInt<8>(offset - kShortSize));
4968     EmitUint8(0xEB);
4969     EmitUint8((offset - kShortSize) & 0xFF);
4970   } else {
4971     EmitUint8(0xEB);
4972     EmitLabelLink(label);
4973   }
4974 }
4975 
4976 
rep_movsw()4977 void X86_64Assembler::rep_movsw() {
4978   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4979   EmitUint8(0x66);
4980   EmitUint8(0xF3);
4981   EmitUint8(0xA5);
4982 }
4983 
rep_movsb()4984 void X86_64Assembler::rep_movsb() {
4985   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4986   EmitUint8(0xF3);
4987   EmitUint8(0xA4);
4988 }
4989 
rep_movsl()4990 void X86_64Assembler::rep_movsl() {
4991   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4992   EmitUint8(0xF3);
4993   EmitUint8(0xA5);
4994 }
4995 
lock()4996 X86_64Assembler* X86_64Assembler::lock() {
4997   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998   EmitUint8(0xF0);
4999   return this;
5000 }
5001 
5002 
cmpxchgb(const Address & address,CpuRegister reg)5003 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
5004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5005   EmitOptionalByteRegNormalizingRex32(reg, address);
5006   EmitUint8(0x0F);
5007   EmitUint8(0xB0);
5008   EmitOperand(reg.LowBits(), address);
5009 }
5010 
5011 
cmpxchgw(const Address & address,CpuRegister reg)5012 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
5013   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5014   EmitOperandSizeOverride();
5015   EmitOptionalRex32(reg, address);
5016   EmitUint8(0x0F);
5017   EmitUint8(0xB1);
5018   EmitOperand(reg.LowBits(), address);
5019 }
5020 
5021 
cmpxchgl(const Address & address,CpuRegister reg)5022 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
5023   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5024   EmitOptionalRex32(reg, address);
5025   EmitUint8(0x0F);
5026   EmitUint8(0xB1);
5027   EmitOperand(reg.LowBits(), address);
5028 }
5029 
5030 
cmpxchgq(const Address & address,CpuRegister reg)5031 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
5032   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5033   EmitRex64(reg, address);
5034   EmitUint8(0x0F);
5035   EmitUint8(0xB1);
5036   EmitOperand(reg.LowBits(), address);
5037 }
5038 
5039 
mfence()5040 void X86_64Assembler::mfence() {
5041   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5042   EmitUint8(0x0F);
5043   EmitUint8(0xAE);
5044   EmitUint8(0xF0);
5045 }
5046 
5047 
gs()5048 X86_64Assembler* X86_64Assembler::gs() {
5049   // TODO: gs is a prefix and not an instruction
5050   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5051   EmitUint8(0x65);
5052   return this;
5053 }
5054 
5055 
AddImmediate(CpuRegister reg,const Immediate & imm)5056 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
5057   int value = imm.value();
5058   if (value != 0) {
5059     if (value > 0) {
5060       addl(reg, imm);
5061     } else {
5062       subl(reg, Immediate(value));
5063     }
5064   }
5065 }
5066 
5067 
setcc(Condition condition,CpuRegister dst)5068 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
5069   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5070   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
5071   if (dst.NeedsRex() || dst.AsRegister() > 3) {
5072     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
5073   }
5074   EmitUint8(0x0F);
5075   EmitUint8(0x90 + condition);
5076   EmitUint8(0xC0 + dst.LowBits());
5077 }
5078 
blsi(CpuRegister dst,CpuRegister src)5079 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
5080   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5081   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5082   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5083                                           /*X=*/ false,
5084                                           src.NeedsRex(),
5085                                           SET_VEX_M_0F_38);
5086   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
5087                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5088                                           SET_VEX_L_128,
5089                                           SET_VEX_PP_NONE);
5090   EmitUint8(byte_zero);
5091   EmitUint8(byte_one);
5092   EmitUint8(byte_two);
5093   EmitUint8(0xF3);
5094   EmitRegisterOperand(3, src.LowBits());
5095 }
5096 
blsmsk(CpuRegister dst,CpuRegister src)5097 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
5098   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5099   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5100   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5101                                           /*X=*/ false,
5102                                           src.NeedsRex(),
5103                                           SET_VEX_M_0F_38);
5104   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5105                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5106                                           SET_VEX_L_128,
5107                                           SET_VEX_PP_NONE);
5108   EmitUint8(byte_zero);
5109   EmitUint8(byte_one);
5110   EmitUint8(byte_two);
5111   EmitUint8(0xF3);
5112   EmitRegisterOperand(2, src.LowBits());
5113 }
5114 
blsr(CpuRegister dst,CpuRegister src)5115 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
5116   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5117   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
5118   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5119                                           /*X=*/ false,
5120                                           src.NeedsRex(),
5121                                           SET_VEX_M_0F_38);
5122   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5123                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5124                                           SET_VEX_L_128,
5125                                           SET_VEX_PP_NONE);
5126   EmitUint8(byte_zero);
5127   EmitUint8(byte_one);
5128   EmitUint8(byte_two);
5129   EmitUint8(0xF3);
5130   EmitRegisterOperand(1, src.LowBits());
5131 }
5132 
bswapl(CpuRegister dst)5133 void X86_64Assembler::bswapl(CpuRegister dst) {
5134   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5135   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
5136   EmitUint8(0x0F);
5137   EmitUint8(0xC8 + dst.LowBits());
5138 }
5139 
bswapq(CpuRegister dst)5140 void X86_64Assembler::bswapq(CpuRegister dst) {
5141   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5142   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
5143   EmitUint8(0x0F);
5144   EmitUint8(0xC8 + dst.LowBits());
5145 }
5146 
bsfl(CpuRegister dst,CpuRegister src)5147 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
5148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5149   EmitOptionalRex32(dst, src);
5150   EmitUint8(0x0F);
5151   EmitUint8(0xBC);
5152   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5153 }
5154 
bsfl(CpuRegister dst,const Address & src)5155 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
5156   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5157   EmitOptionalRex32(dst, src);
5158   EmitUint8(0x0F);
5159   EmitUint8(0xBC);
5160   EmitOperand(dst.LowBits(), src);
5161 }
5162 
bsfq(CpuRegister dst,CpuRegister src)5163 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
5164   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5165   EmitRex64(dst, src);
5166   EmitUint8(0x0F);
5167   EmitUint8(0xBC);
5168   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5169 }
5170 
bsfq(CpuRegister dst,const Address & src)5171 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
5172   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5173   EmitRex64(dst, src);
5174   EmitUint8(0x0F);
5175   EmitUint8(0xBC);
5176   EmitOperand(dst.LowBits(), src);
5177 }
5178 
bsrl(CpuRegister dst,CpuRegister src)5179 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
5180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5181   EmitOptionalRex32(dst, src);
5182   EmitUint8(0x0F);
5183   EmitUint8(0xBD);
5184   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5185 }
5186 
bsrl(CpuRegister dst,const Address & src)5187 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
5188   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5189   EmitOptionalRex32(dst, src);
5190   EmitUint8(0x0F);
5191   EmitUint8(0xBD);
5192   EmitOperand(dst.LowBits(), src);
5193 }
5194 
bsrq(CpuRegister dst,CpuRegister src)5195 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5197   EmitRex64(dst, src);
5198   EmitUint8(0x0F);
5199   EmitUint8(0xBD);
5200   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5201 }
5202 
bsrq(CpuRegister dst,const Address & src)5203 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5204   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5205   EmitRex64(dst, src);
5206   EmitUint8(0x0F);
5207   EmitUint8(0xBD);
5208   EmitOperand(dst.LowBits(), src);
5209 }
5210 
popcntl(CpuRegister dst,CpuRegister src)5211 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5212   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5213   EmitUint8(0xF3);
5214   EmitOptionalRex32(dst, src);
5215   EmitUint8(0x0F);
5216   EmitUint8(0xB8);
5217   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5218 }
5219 
popcntl(CpuRegister dst,const Address & src)5220 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5221   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5222   EmitUint8(0xF3);
5223   EmitOptionalRex32(dst, src);
5224   EmitUint8(0x0F);
5225   EmitUint8(0xB8);
5226   EmitOperand(dst.LowBits(), src);
5227 }
5228 
popcntq(CpuRegister dst,CpuRegister src)5229 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5230   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5231   EmitUint8(0xF3);
5232   EmitRex64(dst, src);
5233   EmitUint8(0x0F);
5234   EmitUint8(0xB8);
5235   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5236 }
5237 
popcntq(CpuRegister dst,const Address & src)5238 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5239   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5240   EmitUint8(0xF3);
5241   EmitRex64(dst, src);
5242   EmitUint8(0x0F);
5243   EmitUint8(0xB8);
5244   EmitOperand(dst.LowBits(), src);
5245 }
5246 
repne_scasb()5247 void X86_64Assembler::repne_scasb() {
5248   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5249   EmitUint8(0xF2);
5250   EmitUint8(0xAE);
5251 }
5252 
repne_scasw()5253 void X86_64Assembler::repne_scasw() {
5254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5255   EmitUint8(0x66);
5256   EmitUint8(0xF2);
5257   EmitUint8(0xAF);
5258 }
5259 
repe_cmpsw()5260 void X86_64Assembler::repe_cmpsw() {
5261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5262   EmitUint8(0x66);
5263   EmitUint8(0xF3);
5264   EmitUint8(0xA7);
5265 }
5266 
5267 
repe_cmpsl()5268 void X86_64Assembler::repe_cmpsl() {
5269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5270   EmitUint8(0xF3);
5271   EmitUint8(0xA7);
5272 }
5273 
5274 
repe_cmpsq()5275 void X86_64Assembler::repe_cmpsq() {
5276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5277   EmitUint8(0xF3);
5278   EmitRex64();
5279   EmitUint8(0xA7);
5280 }
5281 
ud2()5282 void X86_64Assembler::ud2() {
5283   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5284   EmitUint8(0x0F);
5285   EmitUint8(0x0B);
5286 }
5287 
LoadDoubleConstant(XmmRegister dst,double value)5288 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5289   // TODO: Need to have a code constants table.
5290   int64_t constant = bit_cast<int64_t, double>(value);
5291   pushq(Immediate(High32Bits(constant)));
5292   pushq(Immediate(Low32Bits(constant)));
5293   movsd(dst, Address(CpuRegister(RSP), 0));
5294   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5295 }
5296 
5297 
Align(int alignment,int offset)5298 void X86_64Assembler::Align(int alignment, int offset) {
5299   CHECK(IsPowerOfTwo(alignment));
5300   // Emit nop instruction until the real position is aligned.
5301   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5302     nop();
5303   }
5304 }
5305 
5306 
Bind(Label * label)5307 void X86_64Assembler::Bind(Label* label) {
5308   int bound = buffer_.Size();
5309   CHECK(!label->IsBound());  // Labels can only be bound once.
5310   while (label->IsLinked()) {
5311     int position = label->LinkPosition();
5312     int next = buffer_.Load<int32_t>(position);
5313     buffer_.Store<int32_t>(position, bound - (position + 4));
5314     label->position_ = next;
5315   }
5316   label->BindTo(bound);
5317 }
5318 
5319 
Bind(NearLabel * label)5320 void X86_64Assembler::Bind(NearLabel* label) {
5321   int bound = buffer_.Size();
5322   CHECK(!label->IsBound());  // Labels can only be bound once.
5323   while (label->IsLinked()) {
5324     int position = label->LinkPosition();
5325     uint8_t delta = buffer_.Load<uint8_t>(position);
5326     int offset = bound - (position + 1);
5327     CHECK(IsInt<8>(offset));
5328     buffer_.Store<int8_t>(position, offset);
5329     label->position_ = delta != 0u ? label->position_ - delta : 0;
5330   }
5331   label->BindTo(bound);
5332 }
5333 
5334 
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5335 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5336   CHECK_GE(reg_or_opcode, 0);
5337   CHECK_LT(reg_or_opcode, 8);
5338   const int length = operand.length_;
5339   CHECK_GT(length, 0);
5340   // Emit the ModRM byte updated with the given reg value.
5341   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5342   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5343   // Emit the rest of the encoded operand.
5344   for (int i = 1; i < length; i++) {
5345     EmitUint8(operand.encoding_[i]);
5346   }
5347   AssemblerFixup* fixup = operand.GetFixup();
5348   if (fixup != nullptr) {
5349     EmitFixup(fixup);
5350   }
5351 }
5352 
5353 
EmitImmediate(const Immediate & imm,bool is_16_op)5354 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5355   if (is_16_op) {
5356     EmitUint8(imm.value() & 0xFF);
5357     EmitUint8(imm.value() >> 8);
5358   } else if (imm.is_int32()) {
5359     EmitInt32(static_cast<int32_t>(imm.value()));
5360   } else {
5361     EmitInt64(imm.value());
5362   }
5363 }
5364 
5365 
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5366 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5367                                   const Operand& operand,
5368                                   const Immediate& immediate,
5369                                   bool is_16_op) {
5370   CHECK_GE(reg_or_opcode, 0);
5371   CHECK_LT(reg_or_opcode, 8);
5372   if (immediate.is_int8()) {
5373     // Use sign-extended 8-bit immediate.
5374     EmitUint8(0x83);
5375     EmitOperand(reg_or_opcode, operand);
5376     EmitUint8(immediate.value() & 0xFF);
5377   } else if (operand.IsRegister(CpuRegister(RAX))) {
5378     // Use short form if the destination is eax.
5379     EmitUint8(0x05 + (reg_or_opcode << 3));
5380     EmitImmediate(immediate, is_16_op);
5381   } else {
5382     EmitUint8(0x81);
5383     EmitOperand(reg_or_opcode, operand);
5384     EmitImmediate(immediate, is_16_op);
5385   }
5386 }
5387 
5388 
EmitLabel(Label * label,int instruction_size)5389 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5390   if (label->IsBound()) {
5391     int offset = label->Position() - buffer_.Size();
5392     CHECK_LE(offset, 0);
5393     EmitInt32(offset - instruction_size);
5394   } else {
5395     EmitLabelLink(label);
5396   }
5397 }
5398 
5399 
EmitLabelLink(Label * label)5400 void X86_64Assembler::EmitLabelLink(Label* label) {
5401   CHECK(!label->IsBound());
5402   int position = buffer_.Size();
5403   EmitInt32(label->position_);
5404   label->LinkTo(position);
5405 }
5406 
5407 
EmitLabelLink(NearLabel * label)5408 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5409   CHECK(!label->IsBound());
5410   int position = buffer_.Size();
5411   if (label->IsLinked()) {
5412     // Save the delta in the byte that we have to play with.
5413     uint32_t delta = position - label->LinkPosition();
5414     CHECK(IsUint<8>(delta));
5415     EmitUint8(delta & 0xFF);
5416   } else {
5417     EmitUint8(0);
5418   }
5419   label->LinkTo(position);
5420 }
5421 
5422 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5423 void X86_64Assembler::EmitGenericShift(bool wide,
5424                                        int reg_or_opcode,
5425                                        CpuRegister reg,
5426                                        const Immediate& imm) {
5427   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5428   CHECK(imm.is_int8());
5429   if (wide) {
5430     EmitRex64(reg);
5431   } else {
5432     EmitOptionalRex32(reg);
5433   }
5434   if (imm.value() == 1) {
5435     EmitUint8(0xD1);
5436     EmitOperand(reg_or_opcode, Operand(reg));
5437   } else {
5438     EmitUint8(0xC1);
5439     EmitOperand(reg_or_opcode, Operand(reg));
5440     EmitUint8(imm.value() & 0xFF);
5441   }
5442 }
5443 
5444 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5445 void X86_64Assembler::EmitGenericShift(bool wide,
5446                                        int reg_or_opcode,
5447                                        CpuRegister operand,
5448                                        CpuRegister shifter) {
5449   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5450   CHECK_EQ(shifter.AsRegister(), RCX);
5451   if (wide) {
5452     EmitRex64(operand);
5453   } else {
5454     EmitOptionalRex32(operand);
5455   }
5456   EmitUint8(0xD3);
5457   EmitOperand(reg_or_opcode, Operand(operand));
5458 }
5459 
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5460 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5461   // REX.WRXB
5462   // W - 64-bit operand
5463   // R - MODRM.reg
5464   // X - SIB.index
5465   // B - MODRM.rm/SIB.base
5466   uint8_t rex = force ? 0x40 : 0;
5467   if (w) {
5468     rex |= 0x48;  // REX.W000
5469   }
5470   if (r) {
5471     rex |= 0x44;  // REX.0R00
5472   }
5473   if (x) {
5474     rex |= 0x42;  // REX.00X0
5475   }
5476   if (b) {
5477     rex |= 0x41;  // REX.000B
5478   }
5479   if (rex != 0) {
5480     EmitUint8(rex);
5481   }
5482 }
5483 
EmitOptionalRex32(CpuRegister reg)5484 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5485   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5486 }
5487 
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5488 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5489   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5490 }
5491 
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5492 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5493   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5494 }
5495 
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5496 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5497   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5498 }
5499 
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5500 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5501   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5502 }
5503 
EmitOptionalRex32(const Operand & operand)5504 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5505   uint8_t rex = operand.rex();
5506   if (rex != 0) {
5507     EmitUint8(rex);
5508   }
5509 }
5510 
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5511 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5512   uint8_t rex = operand.rex();
5513   if (dst.NeedsRex()) {
5514     rex |= 0x44;  // REX.0R00
5515   }
5516   if (rex != 0) {
5517     EmitUint8(rex);
5518   }
5519 }
5520 
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5521 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5522   uint8_t rex = operand.rex();
5523   if (dst.NeedsRex()) {
5524     rex |= 0x44;  // REX.0R00
5525   }
5526   if (rex != 0) {
5527     EmitUint8(rex);
5528   }
5529 }
5530 
EmitRex64()5531 void X86_64Assembler::EmitRex64() {
5532   EmitOptionalRex(false, true, false, false, false);
5533 }
5534 
EmitRex64(CpuRegister reg)5535 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5536   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5537 }
5538 
EmitRex64(const Operand & operand)5539 void X86_64Assembler::EmitRex64(const Operand& operand) {
5540   uint8_t rex = operand.rex();
5541   rex |= 0x48;  // REX.W000
5542   EmitUint8(rex);
5543 }
5544 
EmitRex64(CpuRegister dst,CpuRegister src)5545 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5546   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5547 }
5548 
EmitRex64(XmmRegister dst,CpuRegister src)5549 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5550   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5551 }
5552 
EmitRex64(CpuRegister dst,XmmRegister src)5553 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5554   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5555 }
5556 
EmitRex64(CpuRegister dst,const Operand & operand)5557 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5558   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5559   if (dst.NeedsRex()) {
5560     rex |= 0x44;  // REX.0R00
5561   }
5562   EmitUint8(rex);
5563 }
5564 
EmitRex64(XmmRegister dst,const Operand & operand)5565 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5566   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5567   if (dst.NeedsRex()) {
5568     rex |= 0x44;  // REX.0R00
5569   }
5570   EmitUint8(rex);
5571 }
5572 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)5573 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
5574                                                           CpuRegister src,
5575                                                           bool normalize_both) {
5576   // SPL, BPL, SIL, DIL need the REX prefix.
5577   bool force = src.AsRegister() > 3;
5578   if (normalize_both) {
5579     // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
5580     // prefix if either `src` or `dst` needs it.
5581     force |= dst.AsRegister() > 3;
5582   } else {
5583     // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
5584     // They need REX prefix only if `src` needs it, but not `dst`.
5585   }
5586   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5587 }
5588 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5589 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5590   uint8_t rex = operand.rex();
5591   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5592   bool force = dst.AsRegister() > 3;
5593   if (force) {
5594     rex |= 0x40;  // REX.0000
5595   }
5596   if (dst.NeedsRex()) {
5597     rex |= 0x44;  // REX.0R00
5598   }
5599   if (rex != 0) {
5600     EmitUint8(rex);
5601   }
5602 }
5603 
AddConstantArea()5604 void X86_64Assembler::AddConstantArea() {
5605   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5606   for (size_t i = 0, e = area.size(); i < e; i++) {
5607     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5608     EmitInt32(area[i]);
5609   }
5610 }
5611 
AppendInt32(int32_t v)5612 size_t ConstantArea::AppendInt32(int32_t v) {
5613   size_t result = buffer_.size() * elem_size_;
5614   buffer_.push_back(v);
5615   return result;
5616 }
5617 
AddInt32(int32_t v)5618 size_t ConstantArea::AddInt32(int32_t v) {
5619   // Look for an existing match.
5620   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5621     if (v == buffer_[i]) {
5622       return i * elem_size_;
5623     }
5624   }
5625 
5626   // Didn't match anything.
5627   return AppendInt32(v);
5628 }
5629 
AddInt64(int64_t v)5630 size_t ConstantArea::AddInt64(int64_t v) {
5631   int32_t v_low = v;
5632   int32_t v_high = v >> 32;
5633   if (buffer_.size() > 1) {
5634     // Ensure we don't pass the end of the buffer.
5635     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5636       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5637         return i * elem_size_;
5638       }
5639     }
5640   }
5641 
5642   // Didn't match anything.
5643   size_t result = buffer_.size() * elem_size_;
5644   buffer_.push_back(v_low);
5645   buffer_.push_back(v_high);
5646   return result;
5647 }
5648 
AddDouble(double v)5649 size_t ConstantArea::AddDouble(double v) {
5650   // Treat the value as a 64-bit integer value.
5651   return AddInt64(bit_cast<int64_t, double>(v));
5652 }
5653 
AddFloat(float v)5654 size_t ConstantArea::AddFloat(float v) {
5655   // Treat the value as a 32-bit integer value.
5656   return AddInt32(bit_cast<int32_t, float>(v));
5657 }
5658 
EmitVexPrefixByteZero(bool is_twobyte_form)5659 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5660   // Vex Byte 0,
5661   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5662   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5663   uint8_t vex_prefix = 0xC0;
5664   if (is_twobyte_form) {
5665     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
5666   } else {
5667     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
5668   }
5669   return vex_prefix;
5670 }
5671 
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5672 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5673   // Vex Byte 1,
5674   uint8_t vex_prefix = VEX_INIT;
5675   /** Bit[7] This bit needs to be set to '1'
5676   otherwise the instruction is LES or LDS */
5677   if (!R) {
5678     // R .
5679     vex_prefix |= SET_VEX_R;
5680   }
5681   /** Bit[6] This bit needs to be set to '1'
5682   otherwise the instruction is LES or LDS */
5683   if (!X) {
5684     // X .
5685     vex_prefix |= SET_VEX_X;
5686   }
5687   /** Bit[5] This bit needs to be set to '1' */
5688   if (!B) {
5689     // B .
5690     vex_prefix |= SET_VEX_B;
5691   }
5692   /** Bits[4:0], Based on the instruction documentaion */
5693   vex_prefix |= SET_VEX_M;
5694   return vex_prefix;
5695 }
5696 
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5697 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5698                                               X86_64ManagedRegister operand,
5699                                               int SET_VEX_L,
5700                                               int SET_VEX_PP) {
5701   // Vex Byte 1,
5702   uint8_t vex_prefix = VEX_INIT;
5703   /** Bit[7] This bit needs to be set to '1'
5704   otherwise the instruction is LES or LDS */
5705   if (!R) {
5706     // R .
5707     vex_prefix |= SET_VEX_R;
5708   }
5709   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5710   if (operand.IsNoRegister()) {
5711     vex_prefix |= 0x78;
5712   } else if (operand.IsXmmRegister()) {
5713     XmmRegister vvvv = operand.AsXmmRegister();
5714     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5715     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5716     vex_prefix |= ((reg & 0x0F) << 3);
5717   } else if (operand.IsCpuRegister()) {
5718     CpuRegister vvvv = operand.AsCpuRegister();
5719     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5720     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5721     vex_prefix |= ((reg & 0x0F) << 3);
5722   }
5723   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5724   VEX.L = 0 indicates 128 bit vector operation */
5725   vex_prefix |= SET_VEX_L;
5726   // Bits[1:0] -  "pp"
5727   vex_prefix |= SET_VEX_PP;
5728   return vex_prefix;
5729 }
5730 
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5731 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5732                                               X86_64ManagedRegister operand,
5733                                               int SET_VEX_L,
5734                                               int SET_VEX_PP) {
5735   // Vex Byte 2,
5736   uint8_t vex_prefix = VEX_INIT;
5737 
5738   /** Bit[7] This bits needs to be set to '1' with default value.
5739   When using C4H form of VEX prefix, REX.W value is ignored */
5740   if (W) {
5741     vex_prefix |= SET_VEX_W;
5742   }
5743   // Bits[6:3] - 'vvvv' the source or dest register specifier
5744   if (operand.IsXmmRegister()) {
5745     XmmRegister vvvv = operand.AsXmmRegister();
5746     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5747     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5748     vex_prefix |= ((reg & 0x0F) << 3);
5749   } else if (operand.IsCpuRegister()) {
5750     CpuRegister vvvv = operand.AsCpuRegister();
5751     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5752     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5753     vex_prefix |= ((reg & 0x0F) << 3);
5754   }
5755   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5756   VEX.L = 0 indicates 128 bit vector operation */
5757   vex_prefix |= SET_VEX_L;
5758   // Bits[1:0] -  "pp"
5759   vex_prefix |= SET_VEX_PP;
5760   return vex_prefix;
5761 }
5762 
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5763 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5764                                               int SET_VEX_L,
5765                                               int SET_VEX_PP) {
5766   // Vex Byte 2,
5767   uint8_t vex_prefix = VEX_INIT;
5768 
5769   /** Bit[7] This bits needs to be set to '1' with default value.
5770   When using C4H form of VEX prefix, REX.W value is ignored */
5771   if (W) {
5772     vex_prefix |= SET_VEX_W;
5773   }
5774   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5775   vex_prefix |= (0x0F << 3);
5776   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5777   VEX.L = 0 indicates 128 bit vector operation */
5778   vex_prefix |= SET_VEX_L;
5779 
5780   // Bits[1:0] -  "pp"
5781   if (SET_VEX_PP != SET_VEX_PP_NONE) {
5782     vex_prefix |= SET_VEX_PP;
5783   }
5784   return vex_prefix;
5785 }
5786 
5787 }  // namespace x86_64
5788 }  // namespace art
5789