• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art HIDDEN {
25 namespace x86_64 {
26 
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   // Instruction VEX Prefix
495   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
496   // Instruction Opcode
497   EmitUint8(0x28);
498   // Instruction Operands
499   EmitOperand(dst.LowBits(), src);
500 }
501 
movups(XmmRegister dst,const Address & src)502 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
503   if (CpuHasAVXorAVX2FeatureFlag()) {
504     vmovups(dst, src);
505     return;
506   }
507   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
508   EmitOptionalRex32(dst, src);
509   EmitUint8(0x0F);
510   EmitUint8(0x10);
511   EmitOperand(dst.LowBits(), src);
512 }
513 
514 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)515 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
516   DCHECK(CpuHasAVXorAVX2FeatureFlag());
517   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
518   // Instruction VEX Prefix
519   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
520   // Instruction Opcode
521   EmitUint8(0x10);
522   // Instruction Operands
523   EmitOperand(dst.LowBits(), src);
524 }
525 
526 
movaps(const Address & dst,XmmRegister src)527 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
528   if (CpuHasAVXorAVX2FeatureFlag()) {
529     vmovaps(dst, src);
530     return;
531   }
532   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
533   EmitOptionalRex32(src, dst);
534   EmitUint8(0x0F);
535   EmitUint8(0x29);
536   EmitOperand(src.LowBits(), dst);
537 }
538 
539 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)540 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
541   DCHECK(CpuHasAVXorAVX2FeatureFlag());
542   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
543   // Instruction VEX Prefix
544   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
545   // Instruction Opcode
546   EmitUint8(0x29);
547   // Instruction Operands
548   EmitOperand(src.LowBits(), dst);
549 }
550 
movups(const Address & dst,XmmRegister src)551 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
552   if (CpuHasAVXorAVX2FeatureFlag()) {
553     vmovups(dst, src);
554     return;
555   }
556   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
557   EmitOptionalRex32(src, dst);
558   EmitUint8(0x0F);
559   EmitUint8(0x11);
560   EmitOperand(src.LowBits(), dst);
561 }
562 
563 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)564 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
565   DCHECK(CpuHasAVXorAVX2FeatureFlag());
566   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
567   // Instruction VEX Prefix
568   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
569   // Instruction Opcode
570   EmitUint8(0x11);
571   // Instruction Operands
572   EmitOperand(src.LowBits(), dst);
573 }
574 
575 
movss(XmmRegister dst,const Address & src)576 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
577   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
578   EmitUint8(0xF3);
579   EmitOptionalRex32(dst, src);
580   EmitUint8(0x0F);
581   EmitUint8(0x10);
582   EmitOperand(dst.LowBits(), src);
583 }
584 
585 
movss(const Address & dst,XmmRegister src)586 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
587   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
588   EmitUint8(0xF3);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x11);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 
movss(XmmRegister dst,XmmRegister src)596 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
597   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
598   EmitUint8(0xF3);
599   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
600   EmitUint8(0x0F);
601   EmitUint8(0x11);
602   EmitXmmRegisterOperand(src.LowBits(), dst);
603 }
604 
605 
movsxd(CpuRegister dst,CpuRegister src)606 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
607   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
608   EmitRex64(dst, src);
609   EmitUint8(0x63);
610   EmitRegisterOperand(dst.LowBits(), src.LowBits());
611 }
612 
613 
movsxd(CpuRegister dst,const Address & src)614 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
615   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
616   EmitRex64(dst, src);
617   EmitUint8(0x63);
618   EmitOperand(dst.LowBits(), src);
619 }
620 
621 
movq(XmmRegister dst,CpuRegister src)622 void X86_64Assembler::movq(XmmRegister dst, CpuRegister src) {
623   EmitMovCpuFpu(dst, src, /*is64bit=*/ true, /*opcode=*/ 0x6E);
624 }
625 
626 
movq(CpuRegister dst,XmmRegister src)627 void X86_64Assembler::movq(CpuRegister dst, XmmRegister src) {
628   EmitMovCpuFpu(src, dst, /*is64bit=*/ true, /*opcode=*/ 0x7E);
629 }
630 
631 
movd(XmmRegister dst,CpuRegister src)632 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
633   EmitMovCpuFpu(dst, src, /*is64bit=*/ false, /*opcode=*/ 0x6E);
634 }
635 
636 
movd(CpuRegister dst,XmmRegister src)637 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
638   EmitMovCpuFpu(src, dst, /*is64bit=*/ false, /*opcode=*/ 0x7E);
639 }
640 
641 
addss(XmmRegister dst,XmmRegister src)642 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
643   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
644   EmitUint8(0xF3);
645   EmitOptionalRex32(dst, src);
646   EmitUint8(0x0F);
647   EmitUint8(0x58);
648   EmitXmmRegisterOperand(dst.LowBits(), src);
649 }
650 
addss(XmmRegister dst,const Address & src)651 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
652   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
653   EmitUint8(0xF3);
654   EmitOptionalRex32(dst, src);
655   EmitUint8(0x0F);
656   EmitUint8(0x58);
657   EmitOperand(dst.LowBits(), src);
658 }
659 
660 
subss(XmmRegister dst,XmmRegister src)661 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
662   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
663   EmitUint8(0xF3);
664   EmitOptionalRex32(dst, src);
665   EmitUint8(0x0F);
666   EmitUint8(0x5C);
667   EmitXmmRegisterOperand(dst.LowBits(), src);
668 }
669 
670 
subss(XmmRegister dst,const Address & src)671 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
672   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
673   EmitUint8(0xF3);
674   EmitOptionalRex32(dst, src);
675   EmitUint8(0x0F);
676   EmitUint8(0x5C);
677   EmitOperand(dst.LowBits(), src);
678 }
679 
680 
mulss(XmmRegister dst,XmmRegister src)681 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
682   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
683   EmitUint8(0xF3);
684   EmitOptionalRex32(dst, src);
685   EmitUint8(0x0F);
686   EmitUint8(0x59);
687   EmitXmmRegisterOperand(dst.LowBits(), src);
688 }
689 
690 
mulss(XmmRegister dst,const Address & src)691 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
692   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
693   EmitUint8(0xF3);
694   EmitOptionalRex32(dst, src);
695   EmitUint8(0x0F);
696   EmitUint8(0x59);
697   EmitOperand(dst.LowBits(), src);
698 }
699 
700 
divss(XmmRegister dst,XmmRegister src)701 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
702   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
703   EmitUint8(0xF3);
704   EmitOptionalRex32(dst, src);
705   EmitUint8(0x0F);
706   EmitUint8(0x5E);
707   EmitXmmRegisterOperand(dst.LowBits(), src);
708 }
709 
710 
divss(XmmRegister dst,const Address & src)711 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
713   EmitUint8(0xF3);
714   EmitOptionalRex32(dst, src);
715   EmitUint8(0x0F);
716   EmitUint8(0x5E);
717   EmitOperand(dst.LowBits(), src);
718 }
719 
720 
addps(XmmRegister dst,XmmRegister src)721 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
722   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
723   EmitOptionalRex32(dst, src);
724   EmitUint8(0x0F);
725   EmitUint8(0x58);
726   EmitXmmRegisterOperand(dst.LowBits(), src);
727 }
728 
729 
subps(XmmRegister dst,XmmRegister src)730 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
731   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
732   EmitOptionalRex32(dst, src);
733   EmitUint8(0x0F);
734   EmitUint8(0x5C);
735   EmitXmmRegisterOperand(dst.LowBits(), src);
736 }
737 
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)738 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
739   EmitVecArithAndLogicalOperation(
740       dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_NONE, /*is_commutative=*/ true);
741 }
742 
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)743 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
744   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_NONE);
745 }
746 
747 
mulps(XmmRegister dst,XmmRegister src)748 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
749   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
750   EmitOptionalRex32(dst, src);
751   EmitUint8(0x0F);
752   EmitUint8(0x59);
753   EmitXmmRegisterOperand(dst.LowBits(), src);
754 }
755 
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)756 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
757   EmitVecArithAndLogicalOperation(
758       dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_NONE, /*is_commutative=*/ true);
759 }
760 
divps(XmmRegister dst,XmmRegister src)761 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
762   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
763   EmitOptionalRex32(dst, src);
764   EmitUint8(0x0F);
765   EmitUint8(0x5E);
766   EmitXmmRegisterOperand(dst.LowBits(), src);
767 }
768 
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)769 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
770   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_NONE);
771 }
772 
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)773 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
774   DCHECK(CpuHasAVXorAVX2FeatureFlag());
775   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
776   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
777   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
778   X86_64ManagedRegister vvvv_reg =
779       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
780   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
781                                  /*X=*/ false,
782                                  right.NeedsRex(),
783                                  SET_VEX_M_0F_38);
784   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
785   EmitUint8(ByteZero);
786   EmitUint8(ByteOne);
787   EmitUint8(ByteTwo);
788   EmitUint8(0xA9);
789   EmitXmmRegisterOperand(acc.LowBits(), right);
790 }
791 
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)792 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
793   DCHECK(CpuHasAVXorAVX2FeatureFlag());
794   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
795   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
796   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
797   X86_64ManagedRegister vvvv_reg =
798       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
799   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
800                                  /*X=*/ false,
801                                  right.NeedsRex(),
802                                  SET_VEX_M_0F_38);
803   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
804   EmitUint8(ByteZero);
805   EmitUint8(ByteOne);
806   EmitUint8(ByteTwo);
807   EmitUint8(0xA9);
808   EmitXmmRegisterOperand(acc.LowBits(), right);
809 }
flds(const Address & src)810 void X86_64Assembler::flds(const Address& src) {
811   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
812   EmitUint8(0xD9);
813   EmitOperand(0, src);
814 }
815 
816 
fsts(const Address & dst)817 void X86_64Assembler::fsts(const Address& dst) {
818   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
819   EmitUint8(0xD9);
820   EmitOperand(2, dst);
821 }
822 
823 
fstps(const Address & dst)824 void X86_64Assembler::fstps(const Address& dst) {
825   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
826   EmitUint8(0xD9);
827   EmitOperand(3, dst);
828 }
829 
830 
movapd(XmmRegister dst,XmmRegister src)831 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
832   if (CpuHasAVXorAVX2FeatureFlag()) {
833     vmovapd(dst, src);
834     return;
835   }
836   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
837   EmitUint8(0x66);
838   EmitOptionalRex32(dst, src);
839   EmitUint8(0x0F);
840   EmitUint8(0x28);
841   EmitXmmRegisterOperand(dst.LowBits(), src);
842 }
843 
844 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)845 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
846   DCHECK(CpuHasAVXorAVX2FeatureFlag());
847   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
848   uint8_t ByteZero, ByteOne, ByteTwo;
849   bool is_twobyte_form = true;
850 
851   if (src.NeedsRex() && dst.NeedsRex()) {
852     is_twobyte_form = false;
853   }
854   // Instruction VEX Prefix
855   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
856   bool load = dst.NeedsRex();
857   if (is_twobyte_form) {
858     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
859     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
860     ByteOne = EmitVexPrefixByteOne(rex_bit,
861                                    vvvv_reg,
862                                    SET_VEX_L_128,
863                                    SET_VEX_PP_66);
864   } else {
865     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
866                                    /*X=*/ false,
867                                    src.NeedsRex(),
868                                    SET_VEX_M_0F);
869     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
870                                    SET_VEX_L_128,
871                                    SET_VEX_PP_66);
872   }
873   EmitUint8(ByteZero);
874   EmitUint8(ByteOne);
875   if (!is_twobyte_form) {
876     EmitUint8(ByteTwo);
877   }
878   // Instruction Opcode
879   if (is_twobyte_form && !load) {
880     EmitUint8(0x29);
881   } else {
882     EmitUint8(0x28);
883   }
884   // Instruction Operands
885   if (is_twobyte_form && !load) {
886     EmitXmmRegisterOperand(src.LowBits(), dst);
887   } else {
888     EmitXmmRegisterOperand(dst.LowBits(), src);
889   }
890 }
891 
movapd(XmmRegister dst,const Address & src)892 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
893   if (CpuHasAVXorAVX2FeatureFlag()) {
894     vmovapd(dst, src);
895     return;
896   }
897   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
898   EmitUint8(0x66);
899   EmitOptionalRex32(dst, src);
900   EmitUint8(0x0F);
901   EmitUint8(0x28);
902   EmitOperand(dst.LowBits(), src);
903 }
904 
905 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)906 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
907   DCHECK(CpuHasAVXorAVX2FeatureFlag());
908   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
909   // Instruction VEX Prefix
910   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
911   // Instruction Opcode
912   EmitUint8(0x28);
913   // Instruction Operands
914   EmitOperand(dst.LowBits(), src);
915 }
916 
movupd(XmmRegister dst,const Address & src)917 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
918   if (CpuHasAVXorAVX2FeatureFlag()) {
919     vmovupd(dst, src);
920     return;
921   }
922   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
923   EmitUint8(0x66);
924   EmitOptionalRex32(dst, src);
925   EmitUint8(0x0F);
926   EmitUint8(0x10);
927   EmitOperand(dst.LowBits(), src);
928 }
929 
930 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)931 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
932   DCHECK(CpuHasAVXorAVX2FeatureFlag());
933   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
934   // Instruction VEX Prefix
935   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
936   // Instruction Opcode
937   EmitUint8(0x10);
938   // Instruction Operands
939   EmitOperand(dst.LowBits(), src);
940 }
941 
movapd(const Address & dst,XmmRegister src)942 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
943   if (CpuHasAVXorAVX2FeatureFlag()) {
944     vmovapd(dst, src);
945     return;
946   }
947   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
948   EmitUint8(0x66);
949   EmitOptionalRex32(src, dst);
950   EmitUint8(0x0F);
951   EmitUint8(0x29);
952   EmitOperand(src.LowBits(), dst);
953 }
954 
955 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)956 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
957   DCHECK(CpuHasAVXorAVX2FeatureFlag());
958   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
959   // Instruction VEX Prefix
960   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
961   // Instruction Opcode
962   EmitUint8(0x29);
963   // Instruction Operands
964   EmitOperand(src.LowBits(), dst);
965 }
966 
movupd(const Address & dst,XmmRegister src)967 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
968   if (CpuHasAVXorAVX2FeatureFlag()) {
969     vmovupd(dst, src);
970     return;
971   }
972   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
973   EmitUint8(0x66);
974   EmitOptionalRex32(src, dst);
975   EmitUint8(0x0F);
976   EmitUint8(0x11);
977   EmitOperand(src.LowBits(), dst);
978 }
979 
980 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)981 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
982   DCHECK(CpuHasAVXorAVX2FeatureFlag());
983   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
984   // Instruction VEX Prefix
985   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
986   // Instruction Opcode
987   EmitUint8(0x11);
988   // Instruction Operands
989   EmitOperand(src.LowBits(), dst);
990 }
991 
992 
movsd(XmmRegister dst,const Address & src)993 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
994   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
995   EmitUint8(0xF2);
996   EmitOptionalRex32(dst, src);
997   EmitUint8(0x0F);
998   EmitUint8(0x10);
999   EmitOperand(dst.LowBits(), src);
1000 }
1001 
1002 
movsd(const Address & dst,XmmRegister src)1003 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1005   EmitUint8(0xF2);
1006   EmitOptionalRex32(src, dst);
1007   EmitUint8(0x0F);
1008   EmitUint8(0x11);
1009   EmitOperand(src.LowBits(), dst);
1010 }
1011 
1012 
movsd(XmmRegister dst,XmmRegister src)1013 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1014   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1015   EmitUint8(0xF2);
1016   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1017   EmitUint8(0x0F);
1018   EmitUint8(0x11);
1019   EmitXmmRegisterOperand(src.LowBits(), dst);
1020 }
1021 
1022 
addsd(XmmRegister dst,XmmRegister src)1023 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025   EmitUint8(0xF2);
1026   EmitOptionalRex32(dst, src);
1027   EmitUint8(0x0F);
1028   EmitUint8(0x58);
1029   EmitXmmRegisterOperand(dst.LowBits(), src);
1030 }
1031 
1032 
addsd(XmmRegister dst,const Address & src)1033 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1035   EmitUint8(0xF2);
1036   EmitOptionalRex32(dst, src);
1037   EmitUint8(0x0F);
1038   EmitUint8(0x58);
1039   EmitOperand(dst.LowBits(), src);
1040 }
1041 
1042 
subsd(XmmRegister dst,XmmRegister src)1043 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1044   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1045   EmitUint8(0xF2);
1046   EmitOptionalRex32(dst, src);
1047   EmitUint8(0x0F);
1048   EmitUint8(0x5C);
1049   EmitXmmRegisterOperand(dst.LowBits(), src);
1050 }
1051 
1052 
subsd(XmmRegister dst,const Address & src)1053 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1055   EmitUint8(0xF2);
1056   EmitOptionalRex32(dst, src);
1057   EmitUint8(0x0F);
1058   EmitUint8(0x5C);
1059   EmitOperand(dst.LowBits(), src);
1060 }
1061 
1062 
mulsd(XmmRegister dst,XmmRegister src)1063 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1064   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1065   EmitUint8(0xF2);
1066   EmitOptionalRex32(dst, src);
1067   EmitUint8(0x0F);
1068   EmitUint8(0x59);
1069   EmitXmmRegisterOperand(dst.LowBits(), src);
1070 }
1071 
1072 
mulsd(XmmRegister dst,const Address & src)1073 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1074   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075   EmitUint8(0xF2);
1076   EmitOptionalRex32(dst, src);
1077   EmitUint8(0x0F);
1078   EmitUint8(0x59);
1079   EmitOperand(dst.LowBits(), src);
1080 }
1081 
1082 
divsd(XmmRegister dst,XmmRegister src)1083 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1084   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1085   EmitUint8(0xF2);
1086   EmitOptionalRex32(dst, src);
1087   EmitUint8(0x0F);
1088   EmitUint8(0x5E);
1089   EmitXmmRegisterOperand(dst.LowBits(), src);
1090 }
1091 
1092 
divsd(XmmRegister dst,const Address & src)1093 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1094   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1095   EmitUint8(0xF2);
1096   EmitOptionalRex32(dst, src);
1097   EmitUint8(0x0F);
1098   EmitUint8(0x5E);
1099   EmitOperand(dst.LowBits(), src);
1100 }
1101 
1102 
addpd(XmmRegister dst,XmmRegister src)1103 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1104   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1105   EmitUint8(0x66);
1106   EmitOptionalRex32(dst, src);
1107   EmitUint8(0x0F);
1108   EmitUint8(0x58);
1109   EmitXmmRegisterOperand(dst.LowBits(), src);
1110 }
1111 
1112 
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1113 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1114   EmitVecArithAndLogicalOperation(
1115       dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_66, /*is_commutative=*/ true);
1116 }
1117 
1118 
subpd(XmmRegister dst,XmmRegister src)1119 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1121   EmitUint8(0x66);
1122   EmitOptionalRex32(dst, src);
1123   EmitUint8(0x0F);
1124   EmitUint8(0x5C);
1125   EmitXmmRegisterOperand(dst.LowBits(), src);
1126 }
1127 
1128 
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1129 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1130   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_66);
1131 }
1132 
1133 
mulpd(XmmRegister dst,XmmRegister src)1134 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1136   EmitUint8(0x66);
1137   EmitOptionalRex32(dst, src);
1138   EmitUint8(0x0F);
1139   EmitUint8(0x59);
1140   EmitXmmRegisterOperand(dst.LowBits(), src);
1141 }
1142 
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1143 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1144   EmitVecArithAndLogicalOperation(
1145       dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_66, /*is_commutative=*/ true);
1146 }
1147 
divpd(XmmRegister dst,XmmRegister src)1148 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1149   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1150   EmitUint8(0x66);
1151   EmitOptionalRex32(dst, src);
1152   EmitUint8(0x0F);
1153   EmitUint8(0x5E);
1154   EmitXmmRegisterOperand(dst.LowBits(), src);
1155 }
1156 
1157 
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1158 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1159   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_66);
1160 }
1161 
1162 
movdqa(XmmRegister dst,XmmRegister src)1163 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1164   if (CpuHasAVXorAVX2FeatureFlag()) {
1165     vmovdqa(dst, src);
1166     return;
1167   }
1168   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1169   EmitUint8(0x66);
1170   EmitOptionalRex32(dst, src);
1171   EmitUint8(0x0F);
1172   EmitUint8(0x6F);
1173   EmitXmmRegisterOperand(dst.LowBits(), src);
1174 }
1175 
1176 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1177 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1178   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1179   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1180   uint8_t ByteZero, ByteOne, ByteTwo;
1181   bool is_twobyte_form = true;
1182 
1183   // Instruction VEX Prefix
1184   if (src.NeedsRex() && dst.NeedsRex()) {
1185     is_twobyte_form = false;
1186   }
1187   bool load = dst.NeedsRex();
1188   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1189   if (is_twobyte_form) {
1190     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1191     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1192     ByteOne = EmitVexPrefixByteOne(rex_bit,
1193                                    vvvv_reg,
1194                                    SET_VEX_L_128,
1195                                    SET_VEX_PP_66);
1196   } else {
1197     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1198                                    /*X=*/ false,
1199                                    src.NeedsRex(),
1200                                    SET_VEX_M_0F);
1201     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1202                                    SET_VEX_L_128,
1203                                    SET_VEX_PP_66);
1204   }
1205   EmitUint8(ByteZero);
1206   EmitUint8(ByteOne);
1207   if (!is_twobyte_form) {
1208     EmitUint8(ByteTwo);
1209   }
1210   // Instruction Opcode
1211   if (is_twobyte_form && !load) {
1212     EmitUint8(0x7F);
1213   } else {
1214     EmitUint8(0x6F);
1215   }
1216   // Instruction Operands
1217   if (is_twobyte_form && !load) {
1218     EmitXmmRegisterOperand(src.LowBits(), dst);
1219   } else {
1220     EmitXmmRegisterOperand(dst.LowBits(), src);
1221   }
1222 }
1223 
movdqa(XmmRegister dst,const Address & src)1224 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1225   if (CpuHasAVXorAVX2FeatureFlag()) {
1226     vmovdqa(dst, src);
1227     return;
1228   }
1229   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1230   EmitUint8(0x66);
1231   EmitOptionalRex32(dst, src);
1232   EmitUint8(0x0F);
1233   EmitUint8(0x6F);
1234   EmitOperand(dst.LowBits(), src);
1235 }
1236 
1237 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1238 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1239   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1240   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1241   // Instruction VEX Prefix
1242   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
1243   // Instruction Opcode
1244   EmitUint8(0x6F);
1245   // Instruction Operands
1246   EmitOperand(dst.LowBits(), src);
1247 }
1248 
movdqu(XmmRegister dst,const Address & src)1249 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1250   if (CpuHasAVXorAVX2FeatureFlag()) {
1251     vmovdqu(dst, src);
1252     return;
1253   }
1254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1255   EmitUint8(0xF3);
1256   EmitOptionalRex32(dst, src);
1257   EmitUint8(0x0F);
1258   EmitUint8(0x6F);
1259   EmitOperand(dst.LowBits(), src);
1260 }
1261 
1262 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1263 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1264 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1265   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1266   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1267   // Instruction VEX Prefix
1268   EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_F3);
1269   // Instruction Opcode
1270   EmitUint8(0x6F);
1271   // Instruction Operands
1272   EmitOperand(dst.LowBits(), src);
1273 }
1274 
movdqa(const Address & dst,XmmRegister src)1275 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1276   if (CpuHasAVXorAVX2FeatureFlag()) {
1277     vmovdqa(dst, src);
1278     return;
1279   }
1280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1281   EmitUint8(0x66);
1282   EmitOptionalRex32(src, dst);
1283   EmitUint8(0x0F);
1284   EmitUint8(0x7F);
1285   EmitOperand(src.LowBits(), dst);
1286 }
1287 
1288 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1289 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1290   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1291   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1292   // Instruction VEX Prefix
1293   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
1294   // Instruction Opcode
1295   EmitUint8(0x7F);
1296   // Instruction Operands
1297   EmitOperand(src.LowBits(), dst);
1298 }
1299 
movdqu(const Address & dst,XmmRegister src)1300 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1301   if (CpuHasAVXorAVX2FeatureFlag()) {
1302     vmovdqu(dst, src);
1303     return;
1304   }
1305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306   EmitUint8(0xF3);
1307   EmitOptionalRex32(src, dst);
1308   EmitUint8(0x0F);
1309   EmitUint8(0x7F);
1310   EmitOperand(src.LowBits(), dst);
1311 }
1312 
1313 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1314 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1315   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1317   // Instruction VEX Prefix
1318   EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_F3);
1319   // Instruction Opcode
1320   EmitUint8(0x7F);
1321   // Instruction Operands
1322   EmitOperand(src.LowBits(), dst);
1323 }
1324 
paddb(XmmRegister dst,XmmRegister src)1325 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1326   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1327   EmitUint8(0x66);
1328   EmitOptionalRex32(dst, src);
1329   EmitUint8(0x0F);
1330   EmitUint8(0xFC);
1331   EmitXmmRegisterOperand(dst.LowBits(), src);
1332 }
1333 
1334 
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1335 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1336   EmitVecArithAndLogicalOperation(
1337       dst, add_left, add_right, /*opcode=*/ 0xFC, SET_VEX_PP_66, /*is_commutative=*/ true);
1338 }
1339 
1340 
psubb(XmmRegister dst,XmmRegister src)1341 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1342   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1343   EmitUint8(0x66);
1344   EmitOptionalRex32(dst, src);
1345   EmitUint8(0x0F);
1346   EmitUint8(0xF8);
1347   EmitXmmRegisterOperand(dst.LowBits(), src);
1348 }
1349 
1350 
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1351 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1352   EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF8, SET_VEX_PP_66);
1353 }
1354 
1355 
paddw(XmmRegister dst,XmmRegister src)1356 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1358   EmitUint8(0x66);
1359   EmitOptionalRex32(dst, src);
1360   EmitUint8(0x0F);
1361   EmitUint8(0xFD);
1362   EmitXmmRegisterOperand(dst.LowBits(), src);
1363 }
1364 
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1365 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1366   EmitVecArithAndLogicalOperation(
1367       dst, add_left, add_right, /*opcode=*/ 0xFD, SET_VEX_PP_66, /*is_commutative=*/ true);
1368 }
1369 
1370 
psubw(XmmRegister dst,XmmRegister src)1371 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1372   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1373   EmitUint8(0x66);
1374   EmitOptionalRex32(dst, src);
1375   EmitUint8(0x0F);
1376   EmitUint8(0xF9);
1377   EmitXmmRegisterOperand(dst.LowBits(), src);
1378 }
1379 
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1380 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1381   EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF9, SET_VEX_PP_66);
1382 }
1383 
1384 
pmullw(XmmRegister dst,XmmRegister src)1385 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1386   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1387   EmitUint8(0x66);
1388   EmitOptionalRex32(dst, src);
1389   EmitUint8(0x0F);
1390   EmitUint8(0xD5);
1391   EmitXmmRegisterOperand(dst.LowBits(), src);
1392 }
1393 
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)1394 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1395   EmitVecArithAndLogicalOperation(
1396       dst, src1, src2, /*opcode=*/ 0xD5, SET_VEX_PP_66, /*is_commutative=*/ true);
1397 }
1398 
paddd(XmmRegister dst,XmmRegister src)1399 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
1400   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1401   EmitUint8(0x66);
1402   EmitOptionalRex32(dst, src);
1403   EmitUint8(0x0F);
1404   EmitUint8(0xFE);
1405   EmitXmmRegisterOperand(dst.LowBits(), src);
1406 }
1407 
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1408 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1409   EmitVecArithAndLogicalOperation(
1410       dst, add_left, add_right, /*opcode=*/ 0xFE, SET_VEX_PP_66, /*is_commutative=*/ true);
1411 }
1412 
psubd(XmmRegister dst,XmmRegister src)1413 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
1414   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1415   EmitUint8(0x66);
1416   EmitOptionalRex32(dst, src);
1417   EmitUint8(0x0F);
1418   EmitUint8(0xFA);
1419   EmitXmmRegisterOperand(dst.LowBits(), src);
1420 }
1421 
1422 
pmulld(XmmRegister dst,XmmRegister src)1423 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
1424   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1425   EmitUint8(0x66);
1426   EmitOptionalRex32(dst, src);
1427   EmitUint8(0x0F);
1428   EmitUint8(0x38);
1429   EmitUint8(0x40);
1430   EmitXmmRegisterOperand(dst.LowBits(), src);
1431 }
1432 
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)1433 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1434   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1435   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1436   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1437   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
1438   X86_64ManagedRegister vvvv_reg =
1439       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1440   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1441                                    /*X=*/ false,
1442                                    src2.NeedsRex(),
1443                                    SET_VEX_M_0F_38);
1444   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1445   EmitUint8(ByteZero);
1446   EmitUint8(ByteOne);
1447   EmitUint8(ByteTwo);
1448   EmitUint8(0x40);
1449   EmitXmmRegisterOperand(dst.LowBits(), src2);
1450 }
1451 
paddq(XmmRegister dst,XmmRegister src)1452 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
1453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1454   EmitUint8(0x66);
1455   EmitOptionalRex32(dst, src);
1456   EmitUint8(0x0F);
1457   EmitUint8(0xD4);
1458   EmitXmmRegisterOperand(dst.LowBits(), src);
1459 }
1460 
1461 
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1462 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1463   EmitVecArithAndLogicalOperation(
1464       dst, add_left, add_right, /*opcode=*/ 0xD4, SET_VEX_PP_66, /*is_commutative=*/ true);
1465 }
1466 
1467 
psubq(XmmRegister dst,XmmRegister src)1468 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
1469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1470   EmitUint8(0x66);
1471   EmitOptionalRex32(dst, src);
1472   EmitUint8(0x0F);
1473   EmitUint8(0xFB);
1474   EmitXmmRegisterOperand(dst.LowBits(), src);
1475 }
1476 
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1477 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1478   EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFB, SET_VEX_PP_66);
1479 }
1480 
1481 
paddusb(XmmRegister dst,XmmRegister src)1482 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
1483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484   EmitUint8(0x66);
1485   EmitOptionalRex32(dst, src);
1486   EmitUint8(0x0F);
1487   EmitUint8(0xDC);
1488   EmitXmmRegisterOperand(dst.LowBits(), src);
1489 }
1490 
1491 
paddsb(XmmRegister dst,XmmRegister src)1492 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
1493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1494   EmitUint8(0x66);
1495   EmitOptionalRex32(dst, src);
1496   EmitUint8(0x0F);
1497   EmitUint8(0xEC);
1498   EmitXmmRegisterOperand(dst.LowBits(), src);
1499 }
1500 
1501 
paddusw(XmmRegister dst,XmmRegister src)1502 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
1503   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1504   EmitUint8(0x66);
1505   EmitOptionalRex32(dst, src);
1506   EmitUint8(0x0F);
1507   EmitUint8(0xDD);
1508   EmitXmmRegisterOperand(dst.LowBits(), src);
1509 }
1510 
1511 
paddsw(XmmRegister dst,XmmRegister src)1512 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
1513   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1514   EmitUint8(0x66);
1515   EmitOptionalRex32(dst, src);
1516   EmitUint8(0x0F);
1517   EmitUint8(0xED);
1518   EmitXmmRegisterOperand(dst.LowBits(), src);
1519 }
1520 
1521 
psubusb(XmmRegister dst,XmmRegister src)1522 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
1523   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1524   EmitUint8(0x66);
1525   EmitOptionalRex32(dst, src);
1526   EmitUint8(0x0F);
1527   EmitUint8(0xD8);
1528   EmitXmmRegisterOperand(dst.LowBits(), src);
1529 }
1530 
1531 
psubsb(XmmRegister dst,XmmRegister src)1532 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
1533   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1534   EmitUint8(0x66);
1535   EmitOptionalRex32(dst, src);
1536   EmitUint8(0x0F);
1537   EmitUint8(0xE8);
1538   EmitXmmRegisterOperand(dst.LowBits(), src);
1539 }
1540 
1541 
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1542 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1543   EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFA, SET_VEX_PP_66);
1544 }
1545 
1546 
psubusw(XmmRegister dst,XmmRegister src)1547 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
1548   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1549   EmitUint8(0x66);
1550   EmitOptionalRex32(dst, src);
1551   EmitUint8(0x0F);
1552   EmitUint8(0xD9);
1553   EmitXmmRegisterOperand(dst.LowBits(), src);
1554 }
1555 
1556 
psubsw(XmmRegister dst,XmmRegister src)1557 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
1558   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1559   EmitUint8(0x66);
1560   EmitOptionalRex32(dst, src);
1561   EmitUint8(0x0F);
1562   EmitUint8(0xE9);
1563   EmitXmmRegisterOperand(dst.LowBits(), src);
1564 }
1565 
1566 
cvtsi2ss(XmmRegister dst,CpuRegister src)1567 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
1568   cvtsi2ss(dst, src, false);
1569 }
1570 
1571 
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)1572 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
1573   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1574   EmitUint8(0xF3);
1575   if (is64bit) {
1576     // Emit a REX.W prefix if the operand size is 64 bits.
1577     EmitRex64(dst, src);
1578   } else {
1579     EmitOptionalRex32(dst, src);
1580   }
1581   EmitUint8(0x0F);
1582   EmitUint8(0x2A);
1583   EmitOperand(dst.LowBits(), Operand(src));
1584 }
1585 
1586 
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)1587 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
1588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1589   EmitUint8(0xF3);
1590   if (is64bit) {
1591     // Emit a REX.W prefix if the operand size is 64 bits.
1592     EmitRex64(dst, src);
1593   } else {
1594     EmitOptionalRex32(dst, src);
1595   }
1596   EmitUint8(0x0F);
1597   EmitUint8(0x2A);
1598   EmitOperand(dst.LowBits(), src);
1599 }
1600 
1601 
cvtsi2sd(XmmRegister dst,CpuRegister src)1602 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
1603   cvtsi2sd(dst, src, false);
1604 }
1605 
1606 
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)1607 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
1608   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1609   EmitUint8(0xF2);
1610   if (is64bit) {
1611     // Emit a REX.W prefix if the operand size is 64 bits.
1612     EmitRex64(dst, src);
1613   } else {
1614     EmitOptionalRex32(dst, src);
1615   }
1616   EmitUint8(0x0F);
1617   EmitUint8(0x2A);
1618   EmitOperand(dst.LowBits(), Operand(src));
1619 }
1620 
1621 
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)1622 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
1623   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1624   EmitUint8(0xF2);
1625   if (is64bit) {
1626     // Emit a REX.W prefix if the operand size is 64 bits.
1627     EmitRex64(dst, src);
1628   } else {
1629     EmitOptionalRex32(dst, src);
1630   }
1631   EmitUint8(0x0F);
1632   EmitUint8(0x2A);
1633   EmitOperand(dst.LowBits(), src);
1634 }
1635 
1636 
cvtss2si(CpuRegister dst,XmmRegister src)1637 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
1638   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1639   EmitUint8(0xF3);
1640   EmitOptionalRex32(dst, src);
1641   EmitUint8(0x0F);
1642   EmitUint8(0x2D);
1643   EmitXmmRegisterOperand(dst.LowBits(), src);
1644 }
1645 
1646 
cvtss2sd(XmmRegister dst,XmmRegister src)1647 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
1648   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1649   EmitUint8(0xF3);
1650   EmitOptionalRex32(dst, src);
1651   EmitUint8(0x0F);
1652   EmitUint8(0x5A);
1653   EmitXmmRegisterOperand(dst.LowBits(), src);
1654 }
1655 
1656 
cvtss2sd(XmmRegister dst,const Address & src)1657 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
1658   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1659   EmitUint8(0xF3);
1660   EmitOptionalRex32(dst, src);
1661   EmitUint8(0x0F);
1662   EmitUint8(0x5A);
1663   EmitOperand(dst.LowBits(), src);
1664 }
1665 
1666 
cvtsd2si(CpuRegister dst,XmmRegister src)1667 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
1668   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1669   EmitUint8(0xF2);
1670   EmitOptionalRex32(dst, src);
1671   EmitUint8(0x0F);
1672   EmitUint8(0x2D);
1673   EmitXmmRegisterOperand(dst.LowBits(), src);
1674 }
1675 
1676 
cvttss2si(CpuRegister dst,XmmRegister src)1677 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
1678   cvttss2si(dst, src, false);
1679 }
1680 
1681 
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)1682 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
1683   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1684   EmitUint8(0xF3);
1685   if (is64bit) {
1686     // Emit a REX.W prefix if the operand size is 64 bits.
1687     EmitRex64(dst, src);
1688   } else {
1689     EmitOptionalRex32(dst, src);
1690   }
1691   EmitUint8(0x0F);
1692   EmitUint8(0x2C);
1693   EmitXmmRegisterOperand(dst.LowBits(), src);
1694 }
1695 
1696 
cvttsd2si(CpuRegister dst,XmmRegister src)1697 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
1698   cvttsd2si(dst, src, false);
1699 }
1700 
1701 
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)1702 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
1703   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1704   EmitUint8(0xF2);
1705   if (is64bit) {
1706     // Emit a REX.W prefix if the operand size is 64 bits.
1707     EmitRex64(dst, src);
1708   } else {
1709     EmitOptionalRex32(dst, src);
1710   }
1711   EmitUint8(0x0F);
1712   EmitUint8(0x2C);
1713   EmitXmmRegisterOperand(dst.LowBits(), src);
1714 }
1715 
1716 
cvtsd2ss(XmmRegister dst,XmmRegister src)1717 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
1718   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1719   EmitUint8(0xF2);
1720   EmitOptionalRex32(dst, src);
1721   EmitUint8(0x0F);
1722   EmitUint8(0x5A);
1723   EmitXmmRegisterOperand(dst.LowBits(), src);
1724 }
1725 
1726 
cvtsd2ss(XmmRegister dst,const Address & src)1727 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
1728   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1729   EmitUint8(0xF2);
1730   EmitOptionalRex32(dst, src);
1731   EmitUint8(0x0F);
1732   EmitUint8(0x5A);
1733   EmitOperand(dst.LowBits(), src);
1734 }
1735 
1736 
cvtdq2ps(XmmRegister dst,XmmRegister src)1737 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
1738   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1739   EmitOptionalRex32(dst, src);
1740   EmitUint8(0x0F);
1741   EmitUint8(0x5B);
1742   EmitXmmRegisterOperand(dst.LowBits(), src);
1743 }
1744 
1745 
cvtdq2pd(XmmRegister dst,XmmRegister src)1746 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
1747   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1748   EmitUint8(0xF3);
1749   EmitOptionalRex32(dst, src);
1750   EmitUint8(0x0F);
1751   EmitUint8(0xE6);
1752   EmitXmmRegisterOperand(dst.LowBits(), src);
1753 }
1754 
1755 
comiss(XmmRegister a,XmmRegister b)1756 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
1757   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1758   EmitOptionalRex32(a, b);
1759   EmitUint8(0x0F);
1760   EmitUint8(0x2F);
1761   EmitXmmRegisterOperand(a.LowBits(), b);
1762 }
1763 
1764 
comiss(XmmRegister a,const Address & b)1765 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
1766   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1767   EmitOptionalRex32(a, b);
1768   EmitUint8(0x0F);
1769   EmitUint8(0x2F);
1770   EmitOperand(a.LowBits(), b);
1771 }
1772 
1773 
comisd(XmmRegister a,XmmRegister b)1774 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
1775   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1776   EmitUint8(0x66);
1777   EmitOptionalRex32(a, b);
1778   EmitUint8(0x0F);
1779   EmitUint8(0x2F);
1780   EmitXmmRegisterOperand(a.LowBits(), b);
1781 }
1782 
1783 
comisd(XmmRegister a,const Address & b)1784 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
1785   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1786   EmitUint8(0x66);
1787   EmitOptionalRex32(a, b);
1788   EmitUint8(0x0F);
1789   EmitUint8(0x2F);
1790   EmitOperand(a.LowBits(), b);
1791 }
1792 
1793 
ucomiss(XmmRegister a,XmmRegister b)1794 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
1795   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1796   EmitOptionalRex32(a, b);
1797   EmitUint8(0x0F);
1798   EmitUint8(0x2E);
1799   EmitXmmRegisterOperand(a.LowBits(), b);
1800 }
1801 
1802 
ucomiss(XmmRegister a,const Address & b)1803 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
1804   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1805   EmitOptionalRex32(a, b);
1806   EmitUint8(0x0F);
1807   EmitUint8(0x2E);
1808   EmitOperand(a.LowBits(), b);
1809 }
1810 
1811 
ucomisd(XmmRegister a,XmmRegister b)1812 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
1813   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1814   EmitUint8(0x66);
1815   EmitOptionalRex32(a, b);
1816   EmitUint8(0x0F);
1817   EmitUint8(0x2E);
1818   EmitXmmRegisterOperand(a.LowBits(), b);
1819 }
1820 
1821 
ucomisd(XmmRegister a,const Address & b)1822 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
1823   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1824   EmitUint8(0x66);
1825   EmitOptionalRex32(a, b);
1826   EmitUint8(0x0F);
1827   EmitUint8(0x2E);
1828   EmitOperand(a.LowBits(), b);
1829 }
1830 
1831 
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)1832 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
1833   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1834   EmitUint8(0x66);
1835   EmitOptionalRex32(dst, src);
1836   EmitUint8(0x0F);
1837   EmitUint8(0x3A);
1838   EmitUint8(0x0B);
1839   EmitXmmRegisterOperand(dst.LowBits(), src);
1840   EmitUint8(imm.value());
1841 }
1842 
1843 
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)1844 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
1845   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1846   EmitUint8(0x66);
1847   EmitOptionalRex32(dst, src);
1848   EmitUint8(0x0F);
1849   EmitUint8(0x3A);
1850   EmitUint8(0x0A);
1851   EmitXmmRegisterOperand(dst.LowBits(), src);
1852   EmitUint8(imm.value());
1853 }
1854 
1855 
sqrtsd(XmmRegister dst,XmmRegister src)1856 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
1857   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1858   EmitUint8(0xF2);
1859   EmitOptionalRex32(dst, src);
1860   EmitUint8(0x0F);
1861   EmitUint8(0x51);
1862   EmitXmmRegisterOperand(dst.LowBits(), src);
1863 }
1864 
1865 
sqrtss(XmmRegister dst,XmmRegister src)1866 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
1867   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1868   EmitUint8(0xF3);
1869   EmitOptionalRex32(dst, src);
1870   EmitUint8(0x0F);
1871   EmitUint8(0x51);
1872   EmitXmmRegisterOperand(dst.LowBits(), src);
1873 }
1874 
1875 
xorpd(XmmRegister dst,const Address & src)1876 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
1877   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1878   EmitUint8(0x66);
1879   EmitOptionalRex32(dst, src);
1880   EmitUint8(0x0F);
1881   EmitUint8(0x57);
1882   EmitOperand(dst.LowBits(), src);
1883 }
1884 
1885 
xorpd(XmmRegister dst,XmmRegister src)1886 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
1887   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1888   EmitUint8(0x66);
1889   EmitOptionalRex32(dst, src);
1890   EmitUint8(0x0F);
1891   EmitUint8(0x57);
1892   EmitXmmRegisterOperand(dst.LowBits(), src);
1893 }
1894 
1895 
xorps(XmmRegister dst,const Address & src)1896 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
1897   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1898   EmitOptionalRex32(dst, src);
1899   EmitUint8(0x0F);
1900   EmitUint8(0x57);
1901   EmitOperand(dst.LowBits(), src);
1902 }
1903 
1904 
xorps(XmmRegister dst,XmmRegister src)1905 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
1906   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1907   EmitOptionalRex32(dst, src);
1908   EmitUint8(0x0F);
1909   EmitUint8(0x57);
1910   EmitXmmRegisterOperand(dst.LowBits(), src);
1911 }
1912 
pxor(XmmRegister dst,XmmRegister src)1913 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
1914   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1915   EmitUint8(0x66);
1916   EmitOptionalRex32(dst, src);
1917   EmitUint8(0x0F);
1918   EmitUint8(0xEF);
1919   EmitXmmRegisterOperand(dst.LowBits(), src);
1920 }
1921 
1922 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)1923 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1924   EmitVecArithAndLogicalOperation(
1925       dst, src1, src2, /*opcode=*/ 0xEF, SET_VEX_PP_66, /*is_commutative=*/ true);
1926 }
1927 
1928 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)1929 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1930   EmitVecArithAndLogicalOperation(
1931       dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_NONE, /*is_commutative=*/ true);
1932 }
1933 
1934 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1935 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1936   EmitVecArithAndLogicalOperation(
1937       dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_66, /*is_commutative=*/ true);
1938 }
1939 
andpd(XmmRegister dst,const Address & src)1940 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
1941   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1942   EmitUint8(0x66);
1943   EmitOptionalRex32(dst, src);
1944   EmitUint8(0x0F);
1945   EmitUint8(0x54);
1946   EmitOperand(dst.LowBits(), src);
1947 }
1948 
andpd(XmmRegister dst,XmmRegister src)1949 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
1950   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1951   EmitUint8(0x66);
1952   EmitOptionalRex32(dst, src);
1953   EmitUint8(0x0F);
1954   EmitUint8(0x54);
1955   EmitXmmRegisterOperand(dst.LowBits(), src);
1956 }
1957 
andps(XmmRegister dst,XmmRegister src)1958 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
1959   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1960   EmitOptionalRex32(dst, src);
1961   EmitUint8(0x0F);
1962   EmitUint8(0x54);
1963   EmitXmmRegisterOperand(dst.LowBits(), src);
1964 }
1965 
pand(XmmRegister dst,XmmRegister src)1966 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
1967   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1968   EmitUint8(0x66);
1969   EmitOptionalRex32(dst, src);
1970   EmitUint8(0x0F);
1971   EmitUint8(0xDB);
1972   EmitXmmRegisterOperand(dst.LowBits(), src);
1973 }
1974 
1975 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)1976 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1977   EmitVecArithAndLogicalOperation(
1978       dst, src1, src2, /*opcode=*/ 0xDB, SET_VEX_PP_66, /*is_commutative=*/ true);
1979 }
1980 
1981 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)1982 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1983   EmitVecArithAndLogicalOperation(
1984       dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_NONE, /*is_commutative=*/ true);
1985 }
1986 
1987 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1988 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1989   EmitVecArithAndLogicalOperation(
1990       dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_66, /*is_commutative=*/ true);
1991 }
1992 
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)1993 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
1994   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1995   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1996   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
1997                                           /*X=*/ false,
1998                                           src2.NeedsRex(),
1999                                           SET_VEX_M_0F_38);
2000   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2001                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2002                                           SET_VEX_L_128,
2003                                           SET_VEX_PP_NONE);
2004   EmitUint8(byte_zero);
2005   EmitUint8(byte_one);
2006   EmitUint8(byte_two);
2007   // Opcode field
2008   EmitUint8(0xF2);
2009   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2010 }
2011 
andnpd(XmmRegister dst,XmmRegister src)2012 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2013   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2014   EmitUint8(0x66);
2015   EmitOptionalRex32(dst, src);
2016   EmitUint8(0x0F);
2017   EmitUint8(0x55);
2018   EmitXmmRegisterOperand(dst.LowBits(), src);
2019 }
2020 
andnps(XmmRegister dst,XmmRegister src)2021 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2022   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2023   EmitOptionalRex32(dst, src);
2024   EmitUint8(0x0F);
2025   EmitUint8(0x55);
2026   EmitXmmRegisterOperand(dst.LowBits(), src);
2027 }
2028 
pandn(XmmRegister dst,XmmRegister src)2029 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2030   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2031   EmitUint8(0x66);
2032   EmitOptionalRex32(dst, src);
2033   EmitUint8(0x0F);
2034   EmitUint8(0xDF);
2035   EmitXmmRegisterOperand(dst.LowBits(), src);
2036 }
2037 
2038 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2039 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2040   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0xDF, SET_VEX_PP_66);
2041 }
2042 
2043 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2044 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2045   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_NONE);
2046 }
2047 
2048 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2049 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2050   EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_66);
2051 }
2052 
orpd(XmmRegister dst,XmmRegister src)2053 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2055   EmitUint8(0x66);
2056   EmitOptionalRex32(dst, src);
2057   EmitUint8(0x0F);
2058   EmitUint8(0x56);
2059   EmitXmmRegisterOperand(dst.LowBits(), src);
2060 }
2061 
orps(XmmRegister dst,XmmRegister src)2062 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
2063   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2064   EmitOptionalRex32(dst, src);
2065   EmitUint8(0x0F);
2066   EmitUint8(0x56);
2067   EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069 
por(XmmRegister dst,XmmRegister src)2070 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
2071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2072   EmitUint8(0x66);
2073   EmitOptionalRex32(dst, src);
2074   EmitUint8(0x0F);
2075   EmitUint8(0xEB);
2076   EmitXmmRegisterOperand(dst.LowBits(), src);
2077 }
2078 
2079 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2080 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2081   EmitVecArithAndLogicalOperation(
2082       dst, src1, src2, /*opcode=*/ 0xEB, SET_VEX_PP_66, /*is_commutative=*/ true);
2083 }
2084 
2085 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2086 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2087   EmitVecArithAndLogicalOperation(
2088       dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_NONE, /*is_commutative=*/ true);
2089 }
2090 
2091 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2092 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2093   EmitVecArithAndLogicalOperation(
2094       dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_66, /*is_commutative=*/ true);
2095 }
2096 
pavgb(XmmRegister dst,XmmRegister src)2097 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
2098   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2099   EmitUint8(0x66);
2100   EmitOptionalRex32(dst, src);
2101   EmitUint8(0x0F);
2102   EmitUint8(0xE0);
2103   EmitXmmRegisterOperand(dst.LowBits(), src);
2104 }
2105 
pavgw(XmmRegister dst,XmmRegister src)2106 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
2107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2108   EmitUint8(0x66);
2109   EmitOptionalRex32(dst, src);
2110   EmitUint8(0x0F);
2111   EmitUint8(0xE3);
2112   EmitXmmRegisterOperand(dst.LowBits(), src);
2113 }
2114 
psadbw(XmmRegister dst,XmmRegister src)2115 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
2116   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2117   EmitUint8(0x66);
2118   EmitOptionalRex32(dst, src);
2119   EmitUint8(0x0F);
2120   EmitUint8(0xF6);
2121   EmitXmmRegisterOperand(dst.LowBits(), src);
2122 }
2123 
pmaddwd(XmmRegister dst,XmmRegister src)2124 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
2125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2126   EmitUint8(0x66);
2127   EmitOptionalRex32(dst, src);
2128   EmitUint8(0x0F);
2129   EmitUint8(0xF5);
2130   EmitXmmRegisterOperand(dst.LowBits(), src);
2131 }
2132 
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2133 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2134   EmitVecArithAndLogicalOperation(
2135       dst, src1, src2, /*opcode=*/ 0xF5, SET_VEX_PP_66, /*is_commutative=*/ true);
2136 }
2137 
phaddw(XmmRegister dst,XmmRegister src)2138 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
2139   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2140   EmitUint8(0x66);
2141   EmitOptionalRex32(dst, src);
2142   EmitUint8(0x0F);
2143   EmitUint8(0x38);
2144   EmitUint8(0x01);
2145   EmitXmmRegisterOperand(dst.LowBits(), src);
2146 }
2147 
phaddd(XmmRegister dst,XmmRegister src)2148 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
2149   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2150   EmitUint8(0x66);
2151   EmitOptionalRex32(dst, src);
2152   EmitUint8(0x0F);
2153   EmitUint8(0x38);
2154   EmitUint8(0x02);
2155   EmitXmmRegisterOperand(dst.LowBits(), src);
2156 }
2157 
haddps(XmmRegister dst,XmmRegister src)2158 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
2159   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2160   EmitUint8(0xF2);
2161   EmitOptionalRex32(dst, src);
2162   EmitUint8(0x0F);
2163   EmitUint8(0x7C);
2164   EmitXmmRegisterOperand(dst.LowBits(), src);
2165 }
2166 
haddpd(XmmRegister dst,XmmRegister src)2167 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
2168   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2169   EmitUint8(0x66);
2170   EmitOptionalRex32(dst, src);
2171   EmitUint8(0x0F);
2172   EmitUint8(0x7C);
2173   EmitXmmRegisterOperand(dst.LowBits(), src);
2174 }
2175 
phsubw(XmmRegister dst,XmmRegister src)2176 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
2177   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2178   EmitUint8(0x66);
2179   EmitOptionalRex32(dst, src);
2180   EmitUint8(0x0F);
2181   EmitUint8(0x38);
2182   EmitUint8(0x05);
2183   EmitXmmRegisterOperand(dst.LowBits(), src);
2184 }
2185 
phsubd(XmmRegister dst,XmmRegister src)2186 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
2187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2188   EmitUint8(0x66);
2189   EmitOptionalRex32(dst, src);
2190   EmitUint8(0x0F);
2191   EmitUint8(0x38);
2192   EmitUint8(0x06);
2193   EmitXmmRegisterOperand(dst.LowBits(), src);
2194 }
2195 
hsubps(XmmRegister dst,XmmRegister src)2196 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
2197   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2198   EmitUint8(0xF2);
2199   EmitOptionalRex32(dst, src);
2200   EmitUint8(0x0F);
2201   EmitUint8(0x7D);
2202   EmitXmmRegisterOperand(dst.LowBits(), src);
2203 }
2204 
hsubpd(XmmRegister dst,XmmRegister src)2205 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
2206   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2207   EmitUint8(0x66);
2208   EmitOptionalRex32(dst, src);
2209   EmitUint8(0x0F);
2210   EmitUint8(0x7D);
2211   EmitXmmRegisterOperand(dst.LowBits(), src);
2212 }
2213 
pminsb(XmmRegister dst,XmmRegister src)2214 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
2215   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2216   EmitUint8(0x66);
2217   EmitOptionalRex32(dst, src);
2218   EmitUint8(0x0F);
2219   EmitUint8(0x38);
2220   EmitUint8(0x38);
2221   EmitXmmRegisterOperand(dst.LowBits(), src);
2222 }
2223 
pmaxsb(XmmRegister dst,XmmRegister src)2224 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
2225   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2226   EmitUint8(0x66);
2227   EmitOptionalRex32(dst, src);
2228   EmitUint8(0x0F);
2229   EmitUint8(0x38);
2230   EmitUint8(0x3C);
2231   EmitXmmRegisterOperand(dst.LowBits(), src);
2232 }
2233 
pminsw(XmmRegister dst,XmmRegister src)2234 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
2235   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2236   EmitUint8(0x66);
2237   EmitOptionalRex32(dst, src);
2238   EmitUint8(0x0F);
2239   EmitUint8(0xEA);
2240   EmitXmmRegisterOperand(dst.LowBits(), src);
2241 }
2242 
pmaxsw(XmmRegister dst,XmmRegister src)2243 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
2244   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2245   EmitUint8(0x66);
2246   EmitOptionalRex32(dst, src);
2247   EmitUint8(0x0F);
2248   EmitUint8(0xEE);
2249   EmitXmmRegisterOperand(dst.LowBits(), src);
2250 }
2251 
pminsd(XmmRegister dst,XmmRegister src)2252 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
2253   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2254   EmitUint8(0x66);
2255   EmitOptionalRex32(dst, src);
2256   EmitUint8(0x0F);
2257   EmitUint8(0x38);
2258   EmitUint8(0x39);
2259   EmitXmmRegisterOperand(dst.LowBits(), src);
2260 }
2261 
pmaxsd(XmmRegister dst,XmmRegister src)2262 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
2263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2264   EmitUint8(0x66);
2265   EmitOptionalRex32(dst, src);
2266   EmitUint8(0x0F);
2267   EmitUint8(0x38);
2268   EmitUint8(0x3D);
2269   EmitXmmRegisterOperand(dst.LowBits(), src);
2270 }
2271 
pminub(XmmRegister dst,XmmRegister src)2272 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
2273   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2274   EmitUint8(0x66);
2275   EmitOptionalRex32(dst, src);
2276   EmitUint8(0x0F);
2277   EmitUint8(0xDA);
2278   EmitXmmRegisterOperand(dst.LowBits(), src);
2279 }
2280 
pmaxub(XmmRegister dst,XmmRegister src)2281 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
2282   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2283   EmitUint8(0x66);
2284   EmitOptionalRex32(dst, src);
2285   EmitUint8(0x0F);
2286   EmitUint8(0xDE);
2287   EmitXmmRegisterOperand(dst.LowBits(), src);
2288 }
2289 
pminuw(XmmRegister dst,XmmRegister src)2290 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
2291   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2292   EmitUint8(0x66);
2293   EmitOptionalRex32(dst, src);
2294   EmitUint8(0x0F);
2295   EmitUint8(0x38);
2296   EmitUint8(0x3A);
2297   EmitXmmRegisterOperand(dst.LowBits(), src);
2298 }
2299 
pmaxuw(XmmRegister dst,XmmRegister src)2300 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
2301   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2302   EmitUint8(0x66);
2303   EmitOptionalRex32(dst, src);
2304   EmitUint8(0x0F);
2305   EmitUint8(0x38);
2306   EmitUint8(0x3E);
2307   EmitXmmRegisterOperand(dst.LowBits(), src);
2308 }
2309 
pminud(XmmRegister dst,XmmRegister src)2310 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
2311   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2312   EmitUint8(0x66);
2313   EmitOptionalRex32(dst, src);
2314   EmitUint8(0x0F);
2315   EmitUint8(0x38);
2316   EmitUint8(0x3B);
2317   EmitXmmRegisterOperand(dst.LowBits(), src);
2318 }
2319 
pmaxud(XmmRegister dst,XmmRegister src)2320 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
2321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2322   EmitUint8(0x66);
2323   EmitOptionalRex32(dst, src);
2324   EmitUint8(0x0F);
2325   EmitUint8(0x38);
2326   EmitUint8(0x3F);
2327   EmitXmmRegisterOperand(dst.LowBits(), src);
2328 }
2329 
minps(XmmRegister dst,XmmRegister src)2330 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
2331   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2332   EmitOptionalRex32(dst, src);
2333   EmitUint8(0x0F);
2334   EmitUint8(0x5D);
2335   EmitXmmRegisterOperand(dst.LowBits(), src);
2336 }
2337 
maxps(XmmRegister dst,XmmRegister src)2338 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
2339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2340   EmitOptionalRex32(dst, src);
2341   EmitUint8(0x0F);
2342   EmitUint8(0x5F);
2343   EmitXmmRegisterOperand(dst.LowBits(), src);
2344 }
2345 
minpd(XmmRegister dst,XmmRegister src)2346 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
2347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2348   EmitUint8(0x66);
2349   EmitOptionalRex32(dst, src);
2350   EmitUint8(0x0F);
2351   EmitUint8(0x5D);
2352   EmitXmmRegisterOperand(dst.LowBits(), src);
2353 }
2354 
maxpd(XmmRegister dst,XmmRegister src)2355 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
2356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2357   EmitUint8(0x66);
2358   EmitOptionalRex32(dst, src);
2359   EmitUint8(0x0F);
2360   EmitUint8(0x5F);
2361   EmitXmmRegisterOperand(dst.LowBits(), src);
2362 }
2363 
pcmpeqb(XmmRegister dst,XmmRegister src)2364 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
2365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2366   EmitUint8(0x66);
2367   EmitOptionalRex32(dst, src);
2368   EmitUint8(0x0F);
2369   EmitUint8(0x74);
2370   EmitXmmRegisterOperand(dst.LowBits(), src);
2371 }
2372 
pcmpeqw(XmmRegister dst,XmmRegister src)2373 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
2374   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2375   EmitUint8(0x66);
2376   EmitOptionalRex32(dst, src);
2377   EmitUint8(0x0F);
2378   EmitUint8(0x75);
2379   EmitXmmRegisterOperand(dst.LowBits(), src);
2380 }
2381 
pcmpeqd(XmmRegister dst,XmmRegister src)2382 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
2383   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2384   EmitUint8(0x66);
2385   EmitOptionalRex32(dst, src);
2386   EmitUint8(0x0F);
2387   EmitUint8(0x76);
2388   EmitXmmRegisterOperand(dst.LowBits(), src);
2389 }
2390 
pcmpeqq(XmmRegister dst,XmmRegister src)2391 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
2392   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2393   EmitUint8(0x66);
2394   EmitOptionalRex32(dst, src);
2395   EmitUint8(0x0F);
2396   EmitUint8(0x38);
2397   EmitUint8(0x29);
2398   EmitXmmRegisterOperand(dst.LowBits(), src);
2399 }
2400 
pcmpgtb(XmmRegister dst,XmmRegister src)2401 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
2402   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2403   EmitUint8(0x66);
2404   EmitOptionalRex32(dst, src);
2405   EmitUint8(0x0F);
2406   EmitUint8(0x64);
2407   EmitXmmRegisterOperand(dst.LowBits(), src);
2408 }
2409 
pcmpgtw(XmmRegister dst,XmmRegister src)2410 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
2411   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2412   EmitUint8(0x66);
2413   EmitOptionalRex32(dst, src);
2414   EmitUint8(0x0F);
2415   EmitUint8(0x65);
2416   EmitXmmRegisterOperand(dst.LowBits(), src);
2417 }
2418 
pcmpgtd(XmmRegister dst,XmmRegister src)2419 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
2420   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2421   EmitUint8(0x66);
2422   EmitOptionalRex32(dst, src);
2423   EmitUint8(0x0F);
2424   EmitUint8(0x66);
2425   EmitXmmRegisterOperand(dst.LowBits(), src);
2426 }
2427 
pcmpgtq(XmmRegister dst,XmmRegister src)2428 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
2429   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2430   EmitUint8(0x66);
2431   EmitOptionalRex32(dst, src);
2432   EmitUint8(0x0F);
2433   EmitUint8(0x38);
2434   EmitUint8(0x37);
2435   EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437 
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)2438 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2439   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2440   EmitUint8(0x66);
2441   EmitOptionalRex32(dst, src);
2442   EmitUint8(0x0F);
2443   EmitUint8(0xC6);
2444   EmitXmmRegisterOperand(dst.LowBits(), src);
2445   EmitUint8(imm.value());
2446 }
2447 
2448 
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)2449 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2450   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451   EmitOptionalRex32(dst, src);
2452   EmitUint8(0x0F);
2453   EmitUint8(0xC6);
2454   EmitXmmRegisterOperand(dst.LowBits(), src);
2455   EmitUint8(imm.value());
2456 }
2457 
2458 
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)2459 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461   EmitUint8(0x66);
2462   EmitOptionalRex32(dst, src);
2463   EmitUint8(0x0F);
2464   EmitUint8(0x70);
2465   EmitXmmRegisterOperand(dst.LowBits(), src);
2466   EmitUint8(imm.value());
2467 }
2468 
2469 
punpcklbw(XmmRegister dst,XmmRegister src)2470 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
2471   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2472   EmitUint8(0x66);
2473   EmitOptionalRex32(dst, src);
2474   EmitUint8(0x0F);
2475   EmitUint8(0x60);
2476   EmitXmmRegisterOperand(dst.LowBits(), src);
2477 }
2478 
2479 
punpcklwd(XmmRegister dst,XmmRegister src)2480 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
2481   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2482   EmitUint8(0x66);
2483   EmitOptionalRex32(dst, src);
2484   EmitUint8(0x0F);
2485   EmitUint8(0x61);
2486   EmitXmmRegisterOperand(dst.LowBits(), src);
2487 }
2488 
2489 
punpckldq(XmmRegister dst,XmmRegister src)2490 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
2491   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2492   EmitUint8(0x66);
2493   EmitOptionalRex32(dst, src);
2494   EmitUint8(0x0F);
2495   EmitUint8(0x62);
2496   EmitXmmRegisterOperand(dst.LowBits(), src);
2497 }
2498 
2499 
punpcklqdq(XmmRegister dst,XmmRegister src)2500 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
2501   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2502   EmitUint8(0x66);
2503   EmitOptionalRex32(dst, src);
2504   EmitUint8(0x0F);
2505   EmitUint8(0x6C);
2506   EmitXmmRegisterOperand(dst.LowBits(), src);
2507 }
2508 
2509 
punpckhbw(XmmRegister dst,XmmRegister src)2510 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
2511   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2512   EmitUint8(0x66);
2513   EmitOptionalRex32(dst, src);
2514   EmitUint8(0x0F);
2515   EmitUint8(0x68);
2516   EmitXmmRegisterOperand(dst.LowBits(), src);
2517 }
2518 
2519 
punpckhwd(XmmRegister dst,XmmRegister src)2520 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
2521   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2522   EmitUint8(0x66);
2523   EmitOptionalRex32(dst, src);
2524   EmitUint8(0x0F);
2525   EmitUint8(0x69);
2526   EmitXmmRegisterOperand(dst.LowBits(), src);
2527 }
2528 
2529 
punpckhdq(XmmRegister dst,XmmRegister src)2530 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
2531   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2532   EmitUint8(0x66);
2533   EmitOptionalRex32(dst, src);
2534   EmitUint8(0x0F);
2535   EmitUint8(0x6A);
2536   EmitXmmRegisterOperand(dst.LowBits(), src);
2537 }
2538 
2539 
punpckhqdq(XmmRegister dst,XmmRegister src)2540 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
2541   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2542   EmitUint8(0x66);
2543   EmitOptionalRex32(dst, src);
2544   EmitUint8(0x0F);
2545   EmitUint8(0x6D);
2546   EmitXmmRegisterOperand(dst.LowBits(), src);
2547 }
2548 
2549 
psllw(XmmRegister reg,const Immediate & shift_count)2550 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
2551   DCHECK(shift_count.is_uint8());
2552   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2553   EmitUint8(0x66);
2554   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2555   EmitUint8(0x0F);
2556   EmitUint8(0x71);
2557   EmitXmmRegisterOperand(6, reg);
2558   EmitUint8(shift_count.value());
2559 }
2560 
2561 
pslld(XmmRegister reg,const Immediate & shift_count)2562 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
2563   DCHECK(shift_count.is_uint8());
2564   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2565   EmitUint8(0x66);
2566   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2567   EmitUint8(0x0F);
2568   EmitUint8(0x72);
2569   EmitXmmRegisterOperand(6, reg);
2570   EmitUint8(shift_count.value());
2571 }
2572 
2573 
psllq(XmmRegister reg,const Immediate & shift_count)2574 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
2575   DCHECK(shift_count.is_uint8());
2576   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2577   EmitUint8(0x66);
2578   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2579   EmitUint8(0x0F);
2580   EmitUint8(0x73);
2581   EmitXmmRegisterOperand(6, reg);
2582   EmitUint8(shift_count.value());
2583 }
2584 
2585 
psraw(XmmRegister reg,const Immediate & shift_count)2586 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
2587   DCHECK(shift_count.is_uint8());
2588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2589   EmitUint8(0x66);
2590   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2591   EmitUint8(0x0F);
2592   EmitUint8(0x71);
2593   EmitXmmRegisterOperand(4, reg);
2594   EmitUint8(shift_count.value());
2595 }
2596 
2597 
psrad(XmmRegister reg,const Immediate & shift_count)2598 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
2599   DCHECK(shift_count.is_uint8());
2600   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2601   EmitUint8(0x66);
2602   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2603   EmitUint8(0x0F);
2604   EmitUint8(0x72);
2605   EmitXmmRegisterOperand(4, reg);
2606   EmitUint8(shift_count.value());
2607 }
2608 
2609 
psrlw(XmmRegister reg,const Immediate & shift_count)2610 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
2611   DCHECK(shift_count.is_uint8());
2612   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2613   EmitUint8(0x66);
2614   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2615   EmitUint8(0x0F);
2616   EmitUint8(0x71);
2617   EmitXmmRegisterOperand(2, reg);
2618   EmitUint8(shift_count.value());
2619 }
2620 
2621 
psrld(XmmRegister reg,const Immediate & shift_count)2622 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
2623   DCHECK(shift_count.is_uint8());
2624   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2625   EmitUint8(0x66);
2626   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2627   EmitUint8(0x0F);
2628   EmitUint8(0x72);
2629   EmitXmmRegisterOperand(2, reg);
2630   EmitUint8(shift_count.value());
2631 }
2632 
2633 
psrlq(XmmRegister reg,const Immediate & shift_count)2634 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
2635   DCHECK(shift_count.is_uint8());
2636   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637   EmitUint8(0x66);
2638   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2639   EmitUint8(0x0F);
2640   EmitUint8(0x73);
2641   EmitXmmRegisterOperand(2, reg);
2642   EmitUint8(shift_count.value());
2643 }
2644 
2645 
psrldq(XmmRegister reg,const Immediate & shift_count)2646 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
2647   DCHECK(shift_count.is_uint8());
2648   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2649   EmitUint8(0x66);
2650   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2651   EmitUint8(0x0F);
2652   EmitUint8(0x73);
2653   EmitXmmRegisterOperand(3, reg);
2654   EmitUint8(shift_count.value());
2655 }
2656 
2657 
fldl(const Address & src)2658 void X86_64Assembler::fldl(const Address& src) {
2659   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2660   EmitUint8(0xDD);
2661   EmitOperand(0, src);
2662 }
2663 
2664 
fstl(const Address & dst)2665 void X86_64Assembler::fstl(const Address& dst) {
2666   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2667   EmitUint8(0xDD);
2668   EmitOperand(2, dst);
2669 }
2670 
2671 
fstpl(const Address & dst)2672 void X86_64Assembler::fstpl(const Address& dst) {
2673   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2674   EmitUint8(0xDD);
2675   EmitOperand(3, dst);
2676 }
2677 
2678 
fstsw()2679 void X86_64Assembler::fstsw() {
2680   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2681   EmitUint8(0x9B);
2682   EmitUint8(0xDF);
2683   EmitUint8(0xE0);
2684 }
2685 
2686 
fnstcw(const Address & dst)2687 void X86_64Assembler::fnstcw(const Address& dst) {
2688   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2689   EmitUint8(0xD9);
2690   EmitOperand(7, dst);
2691 }
2692 
2693 
fldcw(const Address & src)2694 void X86_64Assembler::fldcw(const Address& src) {
2695   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2696   EmitUint8(0xD9);
2697   EmitOperand(5, src);
2698 }
2699 
2700 
fistpl(const Address & dst)2701 void X86_64Assembler::fistpl(const Address& dst) {
2702   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2703   EmitUint8(0xDF);
2704   EmitOperand(7, dst);
2705 }
2706 
2707 
fistps(const Address & dst)2708 void X86_64Assembler::fistps(const Address& dst) {
2709   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2710   EmitUint8(0xDB);
2711   EmitOperand(3, dst);
2712 }
2713 
2714 
fildl(const Address & src)2715 void X86_64Assembler::fildl(const Address& src) {
2716   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2717   EmitUint8(0xDF);
2718   EmitOperand(5, src);
2719 }
2720 
2721 
filds(const Address & src)2722 void X86_64Assembler::filds(const Address& src) {
2723   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2724   EmitUint8(0xDB);
2725   EmitOperand(0, src);
2726 }
2727 
2728 
fincstp()2729 void X86_64Assembler::fincstp() {
2730   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2731   EmitUint8(0xD9);
2732   EmitUint8(0xF7);
2733 }
2734 
2735 
ffree(const Immediate & index)2736 void X86_64Assembler::ffree(const Immediate& index) {
2737   CHECK_LT(index.value(), 7);
2738   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2739   EmitUint8(0xDD);
2740   EmitUint8(0xC0 + index.value());
2741 }
2742 
2743 
fsin()2744 void X86_64Assembler::fsin() {
2745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2746   EmitUint8(0xD9);
2747   EmitUint8(0xFE);
2748 }
2749 
2750 
fcos()2751 void X86_64Assembler::fcos() {
2752   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2753   EmitUint8(0xD9);
2754   EmitUint8(0xFF);
2755 }
2756 
2757 
fptan()2758 void X86_64Assembler::fptan() {
2759   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2760   EmitUint8(0xD9);
2761   EmitUint8(0xF2);
2762 }
2763 
fucompp()2764 void X86_64Assembler::fucompp() {
2765   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2766   EmitUint8(0xDA);
2767   EmitUint8(0xE9);
2768 }
2769 
2770 
fprem()2771 void X86_64Assembler::fprem() {
2772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773   EmitUint8(0xD9);
2774   EmitUint8(0xF8);
2775 }
2776 
2777 
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))2778 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
2779                                    CpuRegister src,
2780                                    void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
2781   Register src_reg = src.AsRegister();
2782   Register dst_reg = dst.AsRegister();
2783   if (src_reg != RAX && dst_reg != RAX) {
2784     return false;
2785   }
2786   if (dst_reg == RAX) {
2787     std::swap(src_reg, dst_reg);
2788   }
2789   if (dst_reg != RAX) {
2790     // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
2791     (this->*prefix_fn)(CpuRegister(dst_reg));
2792   }
2793   EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
2794   return true;
2795 }
2796 
2797 
xchgb(CpuRegister dst,CpuRegister src)2798 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
2799   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2800   // There is no short version for AL.
2801   EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
2802   EmitUint8(0x86);
2803   EmitRegisterOperand(dst.LowBits(), src.LowBits());
2804 }
2805 
2806 
xchgb(CpuRegister reg,const Address & address)2807 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
2808   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2809   EmitOptionalByteRegNormalizingRex32(reg, address);
2810   EmitUint8(0x86);
2811   EmitOperand(reg.LowBits(), address);
2812 }
2813 
2814 
xchgw(CpuRegister dst,CpuRegister src)2815 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
2816   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2817   EmitOperandSizeOverride();
2818   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
2819     // A short version for AX.
2820     return;
2821   }
2822   // General case.
2823   EmitOptionalRex32(dst, src);
2824   EmitUint8(0x87);
2825   EmitRegisterOperand(dst.LowBits(), src.LowBits());
2826 }
2827 
2828 
xchgw(CpuRegister reg,const Address & address)2829 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
2830   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2831   EmitOperandSizeOverride();
2832   EmitOptionalRex32(reg, address);
2833   EmitUint8(0x87);
2834   EmitOperand(reg.LowBits(), address);
2835 }
2836 
2837 
xchgl(CpuRegister dst,CpuRegister src)2838 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
2839   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2840   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
2841     // A short version for EAX.
2842     return;
2843   }
2844   // General case.
2845   EmitOptionalRex32(dst, src);
2846   EmitUint8(0x87);
2847   EmitRegisterOperand(dst.LowBits(), src.LowBits());
2848 }
2849 
2850 
xchgl(CpuRegister reg,const Address & address)2851 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
2852   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2853   EmitOptionalRex32(reg, address);
2854   EmitUint8(0x87);
2855   EmitOperand(reg.LowBits(), address);
2856 }
2857 
2858 
xchgq(CpuRegister dst,CpuRegister src)2859 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
2860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
2862     // A short version for RAX.
2863     return;
2864   }
2865   // General case.
2866   EmitRex64(dst, src);
2867   EmitUint8(0x87);
2868   EmitRegisterOperand(dst.LowBits(), src.LowBits());
2869 }
2870 
2871 
xchgq(CpuRegister reg,const Address & address)2872 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
2873   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2874   EmitRex64(reg, address);
2875   EmitUint8(0x87);
2876   EmitOperand(reg.LowBits(), address);
2877 }
2878 
2879 
xaddb(CpuRegister dst,CpuRegister src)2880 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
2881   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2882   EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
2883   EmitUint8(0x0F);
2884   EmitUint8(0xC0);
2885   EmitRegisterOperand(src.LowBits(), dst.LowBits());
2886 }
2887 
2888 
xaddb(const Address & address,CpuRegister reg)2889 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
2890   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2891   EmitOptionalByteRegNormalizingRex32(reg, address);
2892   EmitUint8(0x0F);
2893   EmitUint8(0xC0);
2894   EmitOperand(reg.LowBits(), address);
2895 }
2896 
2897 
xaddw(CpuRegister dst,CpuRegister src)2898 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
2899   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2900   EmitOperandSizeOverride();
2901   EmitOptionalRex32(src, dst);
2902   EmitUint8(0x0F);
2903   EmitUint8(0xC1);
2904   EmitRegisterOperand(src.LowBits(), dst.LowBits());
2905 }
2906 
2907 
xaddw(const Address & address,CpuRegister reg)2908 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
2909   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2910   EmitOperandSizeOverride();
2911   EmitOptionalRex32(reg, address);
2912   EmitUint8(0x0F);
2913   EmitUint8(0xC1);
2914   EmitOperand(reg.LowBits(), address);
2915 }
2916 
2917 
xaddl(CpuRegister dst,CpuRegister src)2918 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
2919   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2920   EmitOptionalRex32(src, dst);
2921   EmitUint8(0x0F);
2922   EmitUint8(0xC1);
2923   EmitRegisterOperand(src.LowBits(), dst.LowBits());
2924 }
2925 
2926 
xaddl(const Address & address,CpuRegister reg)2927 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
2928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2929   EmitOptionalRex32(reg, address);
2930   EmitUint8(0x0F);
2931   EmitUint8(0xC1);
2932   EmitOperand(reg.LowBits(), address);
2933 }
2934 
2935 
xaddq(CpuRegister dst,CpuRegister src)2936 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
2937   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938   EmitRex64(src, dst);
2939   EmitUint8(0x0F);
2940   EmitUint8(0xC1);
2941   EmitRegisterOperand(src.LowBits(), dst.LowBits());
2942 }
2943 
2944 
xaddq(const Address & address,CpuRegister reg)2945 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
2946   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2947   EmitRex64(reg, address);
2948   EmitUint8(0x0F);
2949   EmitUint8(0xC1);
2950   EmitOperand(reg.LowBits(), address);
2951 }
2952 
2953 
cmpb(const Address & address,const Immediate & imm)2954 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
2955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2956   CHECK(imm.is_int32());
2957   EmitOptionalRex32(address);
2958   EmitUint8(0x80);
2959   EmitOperand(7, address);
2960   EmitUint8(imm.value() & 0xFF);
2961 }
2962 
2963 
cmpw(const Address & address,const Immediate & imm)2964 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
2965   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2966   CHECK(imm.is_int32());
2967   EmitOperandSizeOverride();
2968   EmitOptionalRex32(address);
2969   EmitComplex(7, address, imm, /* is_16_op= */ true);
2970 }
2971 
2972 
cmpl(CpuRegister reg,const Immediate & imm)2973 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
2974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975   CHECK(imm.is_int32());
2976   EmitOptionalRex32(reg);
2977   EmitComplex(7, Operand(reg), imm);
2978 }
2979 
2980 
cmpl(CpuRegister reg0,CpuRegister reg1)2981 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
2982   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2983   EmitOptionalRex32(reg0, reg1);
2984   EmitUint8(0x3B);
2985   EmitOperand(reg0.LowBits(), Operand(reg1));
2986 }
2987 
2988 
cmpl(CpuRegister reg,const Address & address)2989 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
2990   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2991   EmitOptionalRex32(reg, address);
2992   EmitUint8(0x3B);
2993   EmitOperand(reg.LowBits(), address);
2994 }
2995 
2996 
cmpl(const Address & address,CpuRegister reg)2997 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
2998   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2999   EmitOptionalRex32(reg, address);
3000   EmitUint8(0x39);
3001   EmitOperand(reg.LowBits(), address);
3002 }
3003 
3004 
cmpl(const Address & address,const Immediate & imm)3005 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3006   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3007   CHECK(imm.is_int32());
3008   EmitOptionalRex32(address);
3009   EmitComplex(7, address, imm);
3010 }
3011 
3012 
cmpq(CpuRegister reg0,CpuRegister reg1)3013 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3014   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3015   EmitRex64(reg0, reg1);
3016   EmitUint8(0x3B);
3017   EmitOperand(reg0.LowBits(), Operand(reg1));
3018 }
3019 
3020 
cmpq(CpuRegister reg,const Immediate & imm)3021 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3022   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3023   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3024   EmitRex64(reg);
3025   EmitComplex(7, Operand(reg), imm);
3026 }
3027 
3028 
cmpq(CpuRegister reg,const Address & address)3029 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3030   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3031   EmitRex64(reg, address);
3032   EmitUint8(0x3B);
3033   EmitOperand(reg.LowBits(), address);
3034 }
3035 
3036 
cmpq(const Address & address,const Immediate & imm)3037 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3038   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3039   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3040   EmitRex64(address);
3041   EmitComplex(7, address, imm);
3042 }
3043 
3044 
addl(CpuRegister dst,CpuRegister src)3045 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3046   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3047   EmitOptionalRex32(dst, src);
3048   EmitUint8(0x03);
3049   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3050 }
3051 
3052 
addl(CpuRegister reg,const Address & address)3053 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3055   EmitOptionalRex32(reg, address);
3056   EmitUint8(0x03);
3057   EmitOperand(reg.LowBits(), address);
3058 }
3059 
3060 
testl(CpuRegister reg1,CpuRegister reg2)3061 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3063   EmitOptionalRex32(reg1, reg2);
3064   EmitUint8(0x85);
3065   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3066 }
3067 
3068 
testl(CpuRegister reg,const Address & address)3069 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3070   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3071   EmitOptionalRex32(reg, address);
3072   EmitUint8(0x85);
3073   EmitOperand(reg.LowBits(), address);
3074 }
3075 
3076 
testl(CpuRegister reg,const Immediate & immediate)3077 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3078   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3079   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3080   // we only test the byte CpuRegister to keep the encoding short.
3081   if (immediate.is_uint8() && reg.AsRegister() < 4) {
3082     // Use zero-extended 8-bit immediate.
3083     if (reg.AsRegister() == RAX) {
3084       EmitUint8(0xA8);
3085     } else {
3086       EmitUint8(0xF6);
3087       EmitUint8(0xC0 + reg.AsRegister());
3088     }
3089     EmitUint8(immediate.value() & 0xFF);
3090   } else if (reg.AsRegister() == RAX) {
3091     // Use short form if the destination is RAX.
3092     EmitUint8(0xA9);
3093     EmitImmediate(immediate);
3094   } else {
3095     EmitOptionalRex32(reg);
3096     EmitUint8(0xF7);
3097     EmitOperand(0, Operand(reg));
3098     EmitImmediate(immediate);
3099   }
3100 }
3101 
3102 
testq(CpuRegister reg1,CpuRegister reg2)3103 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
3104   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3105   EmitRex64(reg1, reg2);
3106   EmitUint8(0x85);
3107   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3108 }
3109 
3110 
testq(CpuRegister reg,const Address & address)3111 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
3112   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3113   EmitRex64(reg, address);
3114   EmitUint8(0x85);
3115   EmitOperand(reg.LowBits(), address);
3116 }
3117 
3118 
testb(const Address & dst,const Immediate & imm)3119 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
3120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121   EmitOptionalRex32(dst);
3122   EmitUint8(0xF6);
3123   EmitOperand(Register::RAX, dst);
3124   CHECK(imm.is_int8());
3125   EmitUint8(imm.value() & 0xFF);
3126 }
3127 
3128 
testl(const Address & dst,const Immediate & imm)3129 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
3130   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3131   EmitOptionalRex32(dst);
3132   EmitUint8(0xF7);
3133   EmitOperand(0, dst);
3134   EmitImmediate(imm);
3135 }
3136 
3137 
andl(CpuRegister dst,CpuRegister src)3138 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
3139   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3140   EmitOptionalRex32(dst, src);
3141   EmitUint8(0x23);
3142   EmitOperand(dst.LowBits(), Operand(src));
3143 }
3144 
3145 
andl(CpuRegister reg,const Address & address)3146 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
3147   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3148   EmitOptionalRex32(reg, address);
3149   EmitUint8(0x23);
3150   EmitOperand(reg.LowBits(), address);
3151 }
3152 
3153 
andl(CpuRegister dst,const Immediate & imm)3154 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
3155   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3156   CHECK(imm.is_int32());  // andl only supports 32b immediate.
3157   EmitOptionalRex32(dst);
3158   EmitComplex(4, Operand(dst), imm);
3159 }
3160 
3161 
andq(CpuRegister reg,const Immediate & imm)3162 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
3163   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3164   CHECK(imm.is_int32());  // andq only supports 32b immediate.
3165   EmitRex64(reg);
3166   EmitComplex(4, Operand(reg), imm);
3167 }
3168 
3169 
andq(CpuRegister dst,CpuRegister src)3170 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
3171   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3172   EmitRex64(dst, src);
3173   EmitUint8(0x23);
3174   EmitOperand(dst.LowBits(), Operand(src));
3175 }
3176 
3177 
andq(CpuRegister dst,const Address & src)3178 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
3179   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3180   EmitRex64(dst, src);
3181   EmitUint8(0x23);
3182   EmitOperand(dst.LowBits(), src);
3183 }
3184 
3185 
andw(const Address & address,const Immediate & imm)3186 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
3187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3188   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3189   EmitUint8(0x66);
3190   EmitOptionalRex32(address);
3191   EmitComplex(4, address, imm, /* is_16_op= */ true);
3192 }
3193 
3194 
orl(CpuRegister dst,CpuRegister src)3195 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
3196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197   EmitOptionalRex32(dst, src);
3198   EmitUint8(0x0B);
3199   EmitOperand(dst.LowBits(), Operand(src));
3200 }
3201 
3202 
orl(CpuRegister reg,const Address & address)3203 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
3204   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3205   EmitOptionalRex32(reg, address);
3206   EmitUint8(0x0B);
3207   EmitOperand(reg.LowBits(), address);
3208 }
3209 
3210 
orl(CpuRegister dst,const Immediate & imm)3211 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
3212   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3213   EmitOptionalRex32(dst);
3214   EmitComplex(1, Operand(dst), imm);
3215 }
3216 
3217 
orq(CpuRegister dst,const Immediate & imm)3218 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
3219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3220   CHECK(imm.is_int32());  // orq only supports 32b immediate.
3221   EmitRex64(dst);
3222   EmitComplex(1, Operand(dst), imm);
3223 }
3224 
3225 
orq(CpuRegister dst,CpuRegister src)3226 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
3227   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3228   EmitRex64(dst, src);
3229   EmitUint8(0x0B);
3230   EmitOperand(dst.LowBits(), Operand(src));
3231 }
3232 
3233 
orq(CpuRegister dst,const Address & src)3234 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
3235   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3236   EmitRex64(dst, src);
3237   EmitUint8(0x0B);
3238   EmitOperand(dst.LowBits(), src);
3239 }
3240 
3241 
xorl(CpuRegister dst,CpuRegister src)3242 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
3243   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244   EmitOptionalRex32(dst, src);
3245   EmitUint8(0x33);
3246   EmitOperand(dst.LowBits(), Operand(src));
3247 }
3248 
3249 
xorl(CpuRegister reg,const Address & address)3250 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
3251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252   EmitOptionalRex32(reg, address);
3253   EmitUint8(0x33);
3254   EmitOperand(reg.LowBits(), address);
3255 }
3256 
3257 
xorl(CpuRegister dst,const Immediate & imm)3258 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
3259   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3260   EmitOptionalRex32(dst);
3261   EmitComplex(6, Operand(dst), imm);
3262 }
3263 
3264 
xorq(CpuRegister dst,CpuRegister src)3265 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
3266   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3267   EmitRex64(dst, src);
3268   EmitUint8(0x33);
3269   EmitOperand(dst.LowBits(), Operand(src));
3270 }
3271 
3272 
xorq(CpuRegister dst,const Immediate & imm)3273 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
3274   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3275   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
3276   EmitRex64(dst);
3277   EmitComplex(6, Operand(dst), imm);
3278 }
3279 
xorq(CpuRegister dst,const Address & src)3280 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
3281   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282   EmitRex64(dst, src);
3283   EmitUint8(0x33);
3284   EmitOperand(dst.LowBits(), src);
3285 }
3286 
3287 
3288 #if 0
3289 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
3290   // REX.WRXB
3291   // W - 64-bit operand
3292   // R - MODRM.reg
3293   // X - SIB.index
3294   // B - MODRM.rm/SIB.base
3295   uint8_t rex = force ? 0x40 : 0;
3296   if (w) {
3297     rex |= 0x48;  // REX.W000
3298   }
3299   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
3300     rex |= 0x44;  // REX.0R00
3301     *r = static_cast<Register>(*r - 8);
3302   }
3303   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
3304     rex |= 0x42;  // REX.00X0
3305     *x = static_cast<Register>(*x - 8);
3306   }
3307   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
3308     rex |= 0x41;  // REX.000B
3309     *b = static_cast<Register>(*b - 8);
3310   }
3311   if (rex != 0) {
3312     EmitUint8(rex);
3313   }
3314 }
3315 
3316 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
3317   // REX.WRXB
3318   // W - 64-bit operand
3319   // R - MODRM.reg
3320   // X - SIB.index
3321   // B - MODRM.rm/SIB.base
3322   uint8_t rex = mem->rex();
3323   if (force) {
3324     rex |= 0x40;  // REX.0000
3325   }
3326   if (w) {
3327     rex |= 0x48;  // REX.W000
3328   }
3329   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
3330     rex |= 0x44;  // REX.0R00
3331     *dst = static_cast<Register>(*dst - 8);
3332   }
3333   if (rex != 0) {
3334     EmitUint8(rex);
3335   }
3336 }
3337 
3338 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
3339 #endif
3340 
addl(CpuRegister reg,const Immediate & imm)3341 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
3342   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3343   EmitOptionalRex32(reg);
3344   EmitComplex(0, Operand(reg), imm);
3345 }
3346 
3347 
addw(CpuRegister reg,const Immediate & imm)3348 void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) {
3349   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3350   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3351   EmitUint8(0x66);
3352   EmitOptionalRex32(reg);
3353   EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true);
3354 }
3355 
3356 
addq(CpuRegister reg,const Immediate & imm)3357 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
3358   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359   CHECK(imm.is_int32());  // addq only supports 32b immediate.
3360   EmitRex64(reg);
3361   EmitComplex(0, Operand(reg), imm);
3362 }
3363 
3364 
addq(CpuRegister dst,const Address & address)3365 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
3366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3367   EmitRex64(dst, address);
3368   EmitUint8(0x03);
3369   EmitOperand(dst.LowBits(), address);
3370 }
3371 
3372 
addq(CpuRegister dst,CpuRegister src)3373 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
3374   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3375   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
3376   EmitRex64(src, dst);
3377   EmitUint8(0x01);
3378   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3379 }
3380 
3381 
addl(const Address & address,CpuRegister reg)3382 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
3383   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3384   EmitOptionalRex32(reg, address);
3385   EmitUint8(0x01);
3386   EmitOperand(reg.LowBits(), address);
3387 }
3388 
3389 
addl(const Address & address,const Immediate & imm)3390 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
3391   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3392   EmitOptionalRex32(address);
3393   EmitComplex(0, address, imm);
3394 }
3395 
3396 
addw(const Address & address,const Immediate & imm)3397 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
3398   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3399   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3400   EmitUint8(0x66);
3401   EmitOptionalRex32(address);
3402   EmitComplex(0, address, imm, /* is_16_op= */ true);
3403 }
3404 
3405 
addw(const Address & address,CpuRegister reg)3406 void X86_64Assembler::addw(const Address& address, CpuRegister reg) {
3407   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3408   EmitOperandSizeOverride();
3409   EmitOptionalRex32(reg, address);
3410   EmitUint8(0x01);
3411   EmitOperand(reg.LowBits(), address);
3412 }
3413 
3414 
subl(CpuRegister dst,CpuRegister src)3415 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
3416   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3417   EmitOptionalRex32(dst, src);
3418   EmitUint8(0x2B);
3419   EmitOperand(dst.LowBits(), Operand(src));
3420 }
3421 
3422 
subl(CpuRegister reg,const Immediate & imm)3423 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
3424   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3425   EmitOptionalRex32(reg);
3426   EmitComplex(5, Operand(reg), imm);
3427 }
3428 
3429 
subq(CpuRegister reg,const Immediate & imm)3430 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
3431   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3432   CHECK(imm.is_int32());  // subq only supports 32b immediate.
3433   EmitRex64(reg);
3434   EmitComplex(5, Operand(reg), imm);
3435 }
3436 
3437 
subq(CpuRegister dst,CpuRegister src)3438 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
3439   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440   EmitRex64(dst, src);
3441   EmitUint8(0x2B);
3442   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3443 }
3444 
3445 
subq(CpuRegister reg,const Address & address)3446 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
3447   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3448   EmitRex64(reg, address);
3449   EmitUint8(0x2B);
3450   EmitOperand(reg.LowBits() & 7, address);
3451 }
3452 
3453 
subl(CpuRegister reg,const Address & address)3454 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
3455   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3456   EmitOptionalRex32(reg, address);
3457   EmitUint8(0x2B);
3458   EmitOperand(reg.LowBits(), address);
3459 }
3460 
3461 
cdq()3462 void X86_64Assembler::cdq() {
3463   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3464   EmitUint8(0x99);
3465 }
3466 
3467 
cqo()3468 void X86_64Assembler::cqo() {
3469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3470   EmitRex64();
3471   EmitUint8(0x99);
3472 }
3473 
3474 
idivl(CpuRegister reg)3475 void X86_64Assembler::idivl(CpuRegister reg) {
3476   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477   EmitOptionalRex32(reg);
3478   EmitUint8(0xF7);
3479   EmitUint8(0xF8 | reg.LowBits());
3480 }
3481 
3482 
idivq(CpuRegister reg)3483 void X86_64Assembler::idivq(CpuRegister reg) {
3484   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3485   EmitRex64(reg);
3486   EmitUint8(0xF7);
3487   EmitUint8(0xF8 | reg.LowBits());
3488 }
3489 
3490 
divl(CpuRegister reg)3491 void X86_64Assembler::divl(CpuRegister reg) {
3492   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3493   EmitOptionalRex32(reg);
3494   EmitUint8(0xF7);
3495   EmitUint8(0xF0 | reg.LowBits());
3496 }
3497 
3498 
divq(CpuRegister reg)3499 void X86_64Assembler::divq(CpuRegister reg) {
3500   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3501   EmitRex64(reg);
3502   EmitUint8(0xF7);
3503   EmitUint8(0xF0 | reg.LowBits());
3504 }
3505 
3506 
imull(CpuRegister dst,CpuRegister src)3507 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
3508   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509   EmitOptionalRex32(dst, src);
3510   EmitUint8(0x0F);
3511   EmitUint8(0xAF);
3512   EmitOperand(dst.LowBits(), Operand(src));
3513 }
3514 
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)3515 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
3516   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3517   CHECK(imm.is_int32());  // imull only supports 32b immediate.
3518 
3519   EmitOptionalRex32(dst, src);
3520 
3521   // See whether imm can be represented as a sign-extended 8bit value.
3522   int32_t v32 = static_cast<int32_t>(imm.value());
3523   if (IsInt<8>(v32)) {
3524     // Sign-extension works.
3525     EmitUint8(0x6B);
3526     EmitOperand(dst.LowBits(), Operand(src));
3527     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
3528   } else {
3529     // Not representable, use full immediate.
3530     EmitUint8(0x69);
3531     EmitOperand(dst.LowBits(), Operand(src));
3532     EmitImmediate(imm);
3533   }
3534 }
3535 
3536 
imull(CpuRegister reg,const Immediate & imm)3537 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
3538   imull(reg, reg, imm);
3539 }
3540 
3541 
imull(CpuRegister reg,const Address & address)3542 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
3543   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3544   EmitOptionalRex32(reg, address);
3545   EmitUint8(0x0F);
3546   EmitUint8(0xAF);
3547   EmitOperand(reg.LowBits(), address);
3548 }
3549 
3550 
imulq(CpuRegister dst,CpuRegister src)3551 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
3552   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3553   EmitRex64(dst, src);
3554   EmitUint8(0x0F);
3555   EmitUint8(0xAF);
3556   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3557 }
3558 
3559 
imulq(CpuRegister reg,const Immediate & imm)3560 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
3561   imulq(reg, reg, imm);
3562 }
3563 
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)3564 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
3565   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3566   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
3567 
3568   EmitRex64(dst, reg);
3569 
3570   // See whether imm can be represented as a sign-extended 8bit value.
3571   int64_t v64 = imm.value();
3572   if (IsInt<8>(v64)) {
3573     // Sign-extension works.
3574     EmitUint8(0x6B);
3575     EmitOperand(dst.LowBits(), Operand(reg));
3576     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
3577   } else {
3578     // Not representable, use full immediate.
3579     EmitUint8(0x69);
3580     EmitOperand(dst.LowBits(), Operand(reg));
3581     EmitImmediate(imm);
3582   }
3583 }
3584 
imulq(CpuRegister reg,const Address & address)3585 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
3586   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3587   EmitRex64(reg, address);
3588   EmitUint8(0x0F);
3589   EmitUint8(0xAF);
3590   EmitOperand(reg.LowBits(), address);
3591 }
3592 
3593 
imull(CpuRegister reg)3594 void X86_64Assembler::imull(CpuRegister reg) {
3595   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3596   EmitOptionalRex32(reg);
3597   EmitUint8(0xF7);
3598   EmitOperand(5, Operand(reg));
3599 }
3600 
3601 
imulq(CpuRegister reg)3602 void X86_64Assembler::imulq(CpuRegister reg) {
3603   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3604   EmitRex64(reg);
3605   EmitUint8(0xF7);
3606   EmitOperand(5, Operand(reg));
3607 }
3608 
3609 
imull(const Address & address)3610 void X86_64Assembler::imull(const Address& address) {
3611   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3612   EmitOptionalRex32(address);
3613   EmitUint8(0xF7);
3614   EmitOperand(5, address);
3615 }
3616 
3617 
mull(CpuRegister reg)3618 void X86_64Assembler::mull(CpuRegister reg) {
3619   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3620   EmitOptionalRex32(reg);
3621   EmitUint8(0xF7);
3622   EmitOperand(4, Operand(reg));
3623 }
3624 
3625 
mull(const Address & address)3626 void X86_64Assembler::mull(const Address& address) {
3627   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3628   EmitOptionalRex32(address);
3629   EmitUint8(0xF7);
3630   EmitOperand(4, address);
3631 }
3632 
3633 
shll(CpuRegister reg,const Immediate & imm)3634 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
3635   EmitGenericShift(false, 4, reg, imm);
3636 }
3637 
3638 
shlq(CpuRegister reg,const Immediate & imm)3639 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
3640   EmitGenericShift(true, 4, reg, imm);
3641 }
3642 
3643 
shll(CpuRegister operand,CpuRegister shifter)3644 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
3645   EmitGenericShift(false, 4, operand, shifter);
3646 }
3647 
3648 
shlq(CpuRegister operand,CpuRegister shifter)3649 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
3650   EmitGenericShift(true, 4, operand, shifter);
3651 }
3652 
3653 
shrl(CpuRegister reg,const Immediate & imm)3654 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
3655   EmitGenericShift(false, 5, reg, imm);
3656 }
3657 
3658 
shrq(CpuRegister reg,const Immediate & imm)3659 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
3660   EmitGenericShift(true, 5, reg, imm);
3661 }
3662 
3663 
shrl(CpuRegister operand,CpuRegister shifter)3664 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
3665   EmitGenericShift(false, 5, operand, shifter);
3666 }
3667 
3668 
shrq(CpuRegister operand,CpuRegister shifter)3669 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
3670   EmitGenericShift(true, 5, operand, shifter);
3671 }
3672 
3673 
sarl(CpuRegister reg,const Immediate & imm)3674 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
3675   EmitGenericShift(false, 7, reg, imm);
3676 }
3677 
3678 
sarl(CpuRegister operand,CpuRegister shifter)3679 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
3680   EmitGenericShift(false, 7, operand, shifter);
3681 }
3682 
3683 
sarq(CpuRegister reg,const Immediate & imm)3684 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
3685   EmitGenericShift(true, 7, reg, imm);
3686 }
3687 
3688 
sarq(CpuRegister operand,CpuRegister shifter)3689 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
3690   EmitGenericShift(true, 7, operand, shifter);
3691 }
3692 
3693 
roll(CpuRegister reg,const Immediate & imm)3694 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
3695   EmitGenericShift(false, 0, reg, imm);
3696 }
3697 
3698 
roll(CpuRegister operand,CpuRegister shifter)3699 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
3700   EmitGenericShift(false, 0, operand, shifter);
3701 }
3702 
3703 
rorl(CpuRegister reg,const Immediate & imm)3704 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
3705   EmitGenericShift(false, 1, reg, imm);
3706 }
3707 
3708 
rorl(CpuRegister operand,CpuRegister shifter)3709 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
3710   EmitGenericShift(false, 1, operand, shifter);
3711 }
3712 
3713 
rolq(CpuRegister reg,const Immediate & imm)3714 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
3715   EmitGenericShift(true, 0, reg, imm);
3716 }
3717 
3718 
rolq(CpuRegister operand,CpuRegister shifter)3719 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
3720   EmitGenericShift(true, 0, operand, shifter);
3721 }
3722 
3723 
rorq(CpuRegister reg,const Immediate & imm)3724 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
3725   EmitGenericShift(true, 1, reg, imm);
3726 }
3727 
3728 
rorq(CpuRegister operand,CpuRegister shifter)3729 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
3730   EmitGenericShift(true, 1, operand, shifter);
3731 }
3732 
3733 
negl(CpuRegister reg)3734 void X86_64Assembler::negl(CpuRegister reg) {
3735   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3736   EmitOptionalRex32(reg);
3737   EmitUint8(0xF7);
3738   EmitOperand(3, Operand(reg));
3739 }
3740 
3741 
negq(CpuRegister reg)3742 void X86_64Assembler::negq(CpuRegister reg) {
3743   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3744   EmitRex64(reg);
3745   EmitUint8(0xF7);
3746   EmitOperand(3, Operand(reg));
3747 }
3748 
3749 
notl(CpuRegister reg)3750 void X86_64Assembler::notl(CpuRegister reg) {
3751   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3752   EmitOptionalRex32(reg);
3753   EmitUint8(0xF7);
3754   EmitUint8(0xD0 | reg.LowBits());
3755 }
3756 
3757 
notq(CpuRegister reg)3758 void X86_64Assembler::notq(CpuRegister reg) {
3759   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3760   EmitRex64(reg);
3761   EmitUint8(0xF7);
3762   EmitOperand(2, Operand(reg));
3763 }
3764 
3765 
enter(const Immediate & imm)3766 void X86_64Assembler::enter(const Immediate& imm) {
3767   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768   EmitUint8(0xC8);
3769   CHECK(imm.is_uint16()) << imm.value();
3770   EmitUint8(imm.value() & 0xFF);
3771   EmitUint8((imm.value() >> 8) & 0xFF);
3772   EmitUint8(0x00);
3773 }
3774 
3775 
leave()3776 void X86_64Assembler::leave() {
3777   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3778   EmitUint8(0xC9);
3779 }
3780 
3781 
ret()3782 void X86_64Assembler::ret() {
3783   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3784   EmitUint8(0xC3);
3785 }
3786 
3787 
ret(const Immediate & imm)3788 void X86_64Assembler::ret(const Immediate& imm) {
3789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790   EmitUint8(0xC2);
3791   CHECK(imm.is_uint16());
3792   EmitUint8(imm.value() & 0xFF);
3793   EmitUint8((imm.value() >> 8) & 0xFF);
3794 }
3795 
3796 
3797 
nop()3798 void X86_64Assembler::nop() {
3799   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3800   EmitUint8(0x90);
3801 }
3802 
3803 
int3()3804 void X86_64Assembler::int3() {
3805   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3806   EmitUint8(0xCC);
3807 }
3808 
3809 
hlt()3810 void X86_64Assembler::hlt() {
3811   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3812   EmitUint8(0xF4);
3813 }
3814 
3815 
j(Condition condition,Label * label)3816 void X86_64Assembler::j(Condition condition, Label* label) {
3817   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3818   if (label->IsBound()) {
3819     static const int kShortSize = 2;
3820     static const int kLongSize = 6;
3821     int offset = label->Position() - buffer_.Size();
3822     CHECK_LE(offset, 0);
3823     if (IsInt<8>(offset - kShortSize)) {
3824       EmitUint8(0x70 + condition);
3825       EmitUint8((offset - kShortSize) & 0xFF);
3826     } else {
3827       EmitUint8(0x0F);
3828       EmitUint8(0x80 + condition);
3829       EmitInt32(offset - kLongSize);
3830     }
3831   } else {
3832     EmitUint8(0x0F);
3833     EmitUint8(0x80 + condition);
3834     EmitLabelLink(label);
3835   }
3836 }
3837 
3838 
j(Condition condition,NearLabel * label)3839 void X86_64Assembler::j(Condition condition, NearLabel* label) {
3840   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3841   if (label->IsBound()) {
3842     static const int kShortSize = 2;
3843     int offset = label->Position() - buffer_.Size();
3844     CHECK_LE(offset, 0);
3845     CHECK(IsInt<8>(offset - kShortSize));
3846     EmitUint8(0x70 + condition);
3847     EmitUint8((offset - kShortSize) & 0xFF);
3848   } else {
3849     EmitUint8(0x70 + condition);
3850     EmitLabelLink(label);
3851   }
3852 }
3853 
3854 
jrcxz(NearLabel * label)3855 void X86_64Assembler::jrcxz(NearLabel* label) {
3856   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3857   if (label->IsBound()) {
3858     static const int kShortSize = 2;
3859     int offset = label->Position() - buffer_.Size();
3860     CHECK_LE(offset, 0);
3861     CHECK(IsInt<8>(offset - kShortSize));
3862     EmitUint8(0xE3);
3863     EmitUint8((offset - kShortSize) & 0xFF);
3864   } else {
3865     EmitUint8(0xE3);
3866     EmitLabelLink(label);
3867   }
3868 }
3869 
3870 
jmp(CpuRegister reg)3871 void X86_64Assembler::jmp(CpuRegister reg) {
3872   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3873   EmitOptionalRex32(reg);
3874   EmitUint8(0xFF);
3875   EmitRegisterOperand(4, reg.LowBits());
3876 }
3877 
jmp(const Address & address)3878 void X86_64Assembler::jmp(const Address& address) {
3879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880   EmitOptionalRex32(address);
3881   EmitUint8(0xFF);
3882   EmitOperand(4, address);
3883 }
3884 
jmp(Label * label)3885 void X86_64Assembler::jmp(Label* label) {
3886   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3887   if (label->IsBound()) {
3888     static const int kShortSize = 2;
3889     static const int kLongSize = 5;
3890     int offset = label->Position() - buffer_.Size();
3891     CHECK_LE(offset, 0);
3892     if (IsInt<8>(offset - kShortSize)) {
3893       EmitUint8(0xEB);
3894       EmitUint8((offset - kShortSize) & 0xFF);
3895     } else {
3896       EmitUint8(0xE9);
3897       EmitInt32(offset - kLongSize);
3898     }
3899   } else {
3900     EmitUint8(0xE9);
3901     EmitLabelLink(label);
3902   }
3903 }
3904 
3905 
jmp(NearLabel * label)3906 void X86_64Assembler::jmp(NearLabel* label) {
3907   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3908   if (label->IsBound()) {
3909     static const int kShortSize = 2;
3910     int offset = label->Position() - buffer_.Size();
3911     CHECK_LE(offset, 0);
3912     CHECK(IsInt<8>(offset - kShortSize));
3913     EmitUint8(0xEB);
3914     EmitUint8((offset - kShortSize) & 0xFF);
3915   } else {
3916     EmitUint8(0xEB);
3917     EmitLabelLink(label);
3918   }
3919 }
3920 
3921 
rep_movsw()3922 void X86_64Assembler::rep_movsw() {
3923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3924   EmitUint8(0x66);
3925   EmitUint8(0xF3);
3926   EmitUint8(0xA5);
3927 }
3928 
rep_movsb()3929 void X86_64Assembler::rep_movsb() {
3930   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3931   EmitUint8(0xF3);
3932   EmitUint8(0xA4);
3933 }
3934 
rep_movsl()3935 void X86_64Assembler::rep_movsl() {
3936   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937   EmitUint8(0xF3);
3938   EmitUint8(0xA5);
3939 }
3940 
lock()3941 X86_64Assembler* X86_64Assembler::lock() {
3942   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3943   EmitUint8(0xF0);
3944   return this;
3945 }
3946 
3947 
cmpxchgb(const Address & address,CpuRegister reg)3948 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
3949   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3950   EmitOptionalByteRegNormalizingRex32(reg, address);
3951   EmitUint8(0x0F);
3952   EmitUint8(0xB0);
3953   EmitOperand(reg.LowBits(), address);
3954 }
3955 
3956 
cmpxchgw(const Address & address,CpuRegister reg)3957 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
3958   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3959   EmitOperandSizeOverride();
3960   EmitOptionalRex32(reg, address);
3961   EmitUint8(0x0F);
3962   EmitUint8(0xB1);
3963   EmitOperand(reg.LowBits(), address);
3964 }
3965 
3966 
cmpxchgl(const Address & address,CpuRegister reg)3967 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
3968   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969   EmitOptionalRex32(reg, address);
3970   EmitUint8(0x0F);
3971   EmitUint8(0xB1);
3972   EmitOperand(reg.LowBits(), address);
3973 }
3974 
3975 
cmpxchgq(const Address & address,CpuRegister reg)3976 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
3977   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3978   EmitRex64(reg, address);
3979   EmitUint8(0x0F);
3980   EmitUint8(0xB1);
3981   EmitOperand(reg.LowBits(), address);
3982 }
3983 
3984 
mfence()3985 void X86_64Assembler::mfence() {
3986   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3987   EmitUint8(0x0F);
3988   EmitUint8(0xAE);
3989   EmitUint8(0xF0);
3990 }
3991 
3992 
gs()3993 X86_64Assembler* X86_64Assembler::gs() {
3994   // TODO: gs is a prefix and not an instruction
3995   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3996   EmitUint8(0x65);
3997   return this;
3998 }
3999 
4000 
AddImmediate(CpuRegister reg,const Immediate & imm)4001 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4002   int value = imm.value();
4003   if (value != 0) {
4004     if (value > 0) {
4005       addl(reg, imm);
4006     } else {
4007       subl(reg, Immediate(value));
4008     }
4009   }
4010 }
4011 
4012 
setcc(Condition condition,CpuRegister dst)4013 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4014   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4015   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4016   if (dst.NeedsRex() || dst.AsRegister() > 3) {
4017     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4018   }
4019   EmitUint8(0x0F);
4020   EmitUint8(0x90 + condition);
4021   EmitUint8(0xC0 + dst.LowBits());
4022 }
4023 
blsi(CpuRegister dst,CpuRegister src)4024 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4025   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4026   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4027   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4028                                           /*X=*/ false,
4029                                           src.NeedsRex(),
4030                                           SET_VEX_M_0F_38);
4031   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4032                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4033                                           SET_VEX_L_128,
4034                                           SET_VEX_PP_NONE);
4035   EmitUint8(byte_zero);
4036   EmitUint8(byte_one);
4037   EmitUint8(byte_two);
4038   EmitUint8(0xF3);
4039   EmitRegisterOperand(3, src.LowBits());
4040 }
4041 
blsmsk(CpuRegister dst,CpuRegister src)4042 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4043   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4044   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4045   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4046                                           /*X=*/ false,
4047                                           src.NeedsRex(),
4048                                           SET_VEX_M_0F_38);
4049   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4050                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4051                                           SET_VEX_L_128,
4052                                           SET_VEX_PP_NONE);
4053   EmitUint8(byte_zero);
4054   EmitUint8(byte_one);
4055   EmitUint8(byte_two);
4056   EmitUint8(0xF3);
4057   EmitRegisterOperand(2, src.LowBits());
4058 }
4059 
blsr(CpuRegister dst,CpuRegister src)4060 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4063   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4064                                           /*X=*/ false,
4065                                           src.NeedsRex(),
4066                                           SET_VEX_M_0F_38);
4067   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4068                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4069                                           SET_VEX_L_128,
4070                                           SET_VEX_PP_NONE);
4071   EmitUint8(byte_zero);
4072   EmitUint8(byte_one);
4073   EmitUint8(byte_two);
4074   EmitUint8(0xF3);
4075   EmitRegisterOperand(1, src.LowBits());
4076 }
4077 
bswapl(CpuRegister dst)4078 void X86_64Assembler::bswapl(CpuRegister dst) {
4079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4080   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4081   EmitUint8(0x0F);
4082   EmitUint8(0xC8 + dst.LowBits());
4083 }
4084 
bswapq(CpuRegister dst)4085 void X86_64Assembler::bswapq(CpuRegister dst) {
4086   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4087   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4088   EmitUint8(0x0F);
4089   EmitUint8(0xC8 + dst.LowBits());
4090 }
4091 
bsfl(CpuRegister dst,CpuRegister src)4092 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4093   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4094   EmitOptionalRex32(dst, src);
4095   EmitUint8(0x0F);
4096   EmitUint8(0xBC);
4097   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4098 }
4099 
bsfl(CpuRegister dst,const Address & src)4100 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4101   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4102   EmitOptionalRex32(dst, src);
4103   EmitUint8(0x0F);
4104   EmitUint8(0xBC);
4105   EmitOperand(dst.LowBits(), src);
4106 }
4107 
bsfq(CpuRegister dst,CpuRegister src)4108 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4109   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4110   EmitRex64(dst, src);
4111   EmitUint8(0x0F);
4112   EmitUint8(0xBC);
4113   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4114 }
4115 
bsfq(CpuRegister dst,const Address & src)4116 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4117   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4118   EmitRex64(dst, src);
4119   EmitUint8(0x0F);
4120   EmitUint8(0xBC);
4121   EmitOperand(dst.LowBits(), src);
4122 }
4123 
bsrl(CpuRegister dst,CpuRegister src)4124 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4126   EmitOptionalRex32(dst, src);
4127   EmitUint8(0x0F);
4128   EmitUint8(0xBD);
4129   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4130 }
4131 
bsrl(CpuRegister dst,const Address & src)4132 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4133   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4134   EmitOptionalRex32(dst, src);
4135   EmitUint8(0x0F);
4136   EmitUint8(0xBD);
4137   EmitOperand(dst.LowBits(), src);
4138 }
4139 
bsrq(CpuRegister dst,CpuRegister src)4140 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
4141   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4142   EmitRex64(dst, src);
4143   EmitUint8(0x0F);
4144   EmitUint8(0xBD);
4145   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4146 }
4147 
bsrq(CpuRegister dst,const Address & src)4148 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
4149   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4150   EmitRex64(dst, src);
4151   EmitUint8(0x0F);
4152   EmitUint8(0xBD);
4153   EmitOperand(dst.LowBits(), src);
4154 }
4155 
popcntl(CpuRegister dst,CpuRegister src)4156 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
4157   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4158   EmitUint8(0xF3);
4159   EmitOptionalRex32(dst, src);
4160   EmitUint8(0x0F);
4161   EmitUint8(0xB8);
4162   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4163 }
4164 
popcntl(CpuRegister dst,const Address & src)4165 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
4166   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4167   EmitUint8(0xF3);
4168   EmitOptionalRex32(dst, src);
4169   EmitUint8(0x0F);
4170   EmitUint8(0xB8);
4171   EmitOperand(dst.LowBits(), src);
4172 }
4173 
popcntq(CpuRegister dst,CpuRegister src)4174 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
4175   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4176   EmitUint8(0xF3);
4177   EmitRex64(dst, src);
4178   EmitUint8(0x0F);
4179   EmitUint8(0xB8);
4180   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4181 }
4182 
popcntq(CpuRegister dst,const Address & src)4183 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
4184   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4185   EmitUint8(0xF3);
4186   EmitRex64(dst, src);
4187   EmitUint8(0x0F);
4188   EmitUint8(0xB8);
4189   EmitOperand(dst.LowBits(), src);
4190 }
4191 
rdtsc()4192 void X86_64Assembler::rdtsc() {
4193   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4194   EmitUint8(0x0F);
4195   EmitUint8(0x31);
4196 }
4197 
repne_scasb()4198 void X86_64Assembler::repne_scasb() {
4199   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4200   EmitUint8(0xF2);
4201   EmitUint8(0xAE);
4202 }
4203 
repne_scasw()4204 void X86_64Assembler::repne_scasw() {
4205   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4206   EmitUint8(0x66);
4207   EmitUint8(0xF2);
4208   EmitUint8(0xAF);
4209 }
4210 
repe_cmpsw()4211 void X86_64Assembler::repe_cmpsw() {
4212   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4213   EmitUint8(0x66);
4214   EmitUint8(0xF3);
4215   EmitUint8(0xA7);
4216 }
4217 
4218 
repe_cmpsl()4219 void X86_64Assembler::repe_cmpsl() {
4220   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4221   EmitUint8(0xF3);
4222   EmitUint8(0xA7);
4223 }
4224 
4225 
repe_cmpsq()4226 void X86_64Assembler::repe_cmpsq() {
4227   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4228   EmitUint8(0xF3);
4229   EmitRex64();
4230   EmitUint8(0xA7);
4231 }
4232 
ud2()4233 void X86_64Assembler::ud2() {
4234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4235   EmitUint8(0x0F);
4236   EmitUint8(0x0B);
4237 }
4238 
LoadDoubleConstant(XmmRegister dst,double value)4239 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
4240   // TODO: Need to have a code constants table.
4241   int64_t constant = bit_cast<int64_t, double>(value);
4242   pushq(Immediate(High32Bits(constant)));
4243   pushq(Immediate(Low32Bits(constant)));
4244   movsd(dst, Address(CpuRegister(RSP), 0));
4245   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
4246 }
4247 
4248 
Align(int alignment,int offset)4249 void X86_64Assembler::Align(int alignment, int offset) {
4250   CHECK(IsPowerOfTwo(alignment));
4251   // Emit nop instruction until the real position is aligned.
4252   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
4253     nop();
4254   }
4255 }
4256 
4257 
Bind(Label * label)4258 void X86_64Assembler::Bind(Label* label) {
4259   int bound = buffer_.Size();
4260   CHECK(!label->IsBound());  // Labels can only be bound once.
4261   while (label->IsLinked()) {
4262     int position = label->LinkPosition();
4263     int next = buffer_.Load<int32_t>(position);
4264     buffer_.Store<int32_t>(position, bound - (position + 4));
4265     label->position_ = next;
4266   }
4267   label->BindTo(bound);
4268 }
4269 
4270 
Bind(NearLabel * label)4271 void X86_64Assembler::Bind(NearLabel* label) {
4272   int bound = buffer_.Size();
4273   CHECK(!label->IsBound());  // Labels can only be bound once.
4274   while (label->IsLinked()) {
4275     int position = label->LinkPosition();
4276     uint8_t delta = buffer_.Load<uint8_t>(position);
4277     int offset = bound - (position + 1);
4278     CHECK(IsInt<8>(offset));
4279     buffer_.Store<int8_t>(position, offset);
4280     label->position_ = delta != 0u ? label->position_ - delta : 0;
4281   }
4282   label->BindTo(bound);
4283 }
4284 
4285 
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)4286 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
4287   CHECK_GE(reg_or_opcode, 0);
4288   CHECK_LT(reg_or_opcode, 8);
4289   const int length = operand.length_;
4290   CHECK_GT(length, 0);
4291   // Emit the ModRM byte updated with the given reg value.
4292   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
4293   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
4294   // Emit the rest of the encoded operand.
4295   for (int i = 1; i < length; i++) {
4296     EmitUint8(operand.encoding_[i]);
4297   }
4298   AssemblerFixup* fixup = operand.GetFixup();
4299   if (fixup != nullptr) {
4300     EmitFixup(fixup);
4301   }
4302 }
4303 
4304 
EmitImmediate(const Immediate & imm,bool is_16_op)4305 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
4306   if (is_16_op) {
4307     EmitUint8(imm.value() & 0xFF);
4308     EmitUint8(imm.value() >> 8);
4309   } else if (imm.is_int32()) {
4310     EmitInt32(static_cast<int32_t>(imm.value()));
4311   } else {
4312     EmitInt64(imm.value());
4313   }
4314 }
4315 
4316 
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)4317 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
4318                                   const Operand& operand,
4319                                   const Immediate& immediate,
4320                                   bool is_16_op) {
4321   CHECK_GE(reg_or_opcode, 0);
4322   CHECK_LT(reg_or_opcode, 8);
4323   if (immediate.is_int8()) {
4324     // Use sign-extended 8-bit immediate.
4325     EmitUint8(0x83);
4326     EmitOperand(reg_or_opcode, operand);
4327     EmitUint8(immediate.value() & 0xFF);
4328   } else if (operand.IsRegister(CpuRegister(RAX))) {
4329     // Use short form if the destination is eax.
4330     EmitUint8(0x05 + (reg_or_opcode << 3));
4331     EmitImmediate(immediate, is_16_op);
4332   } else {
4333     EmitUint8(0x81);
4334     EmitOperand(reg_or_opcode, operand);
4335     EmitImmediate(immediate, is_16_op);
4336   }
4337 }
4338 
4339 
EmitLabel(Label * label,int instruction_size)4340 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
4341   if (label->IsBound()) {
4342     int offset = label->Position() - buffer_.Size();
4343     CHECK_LE(offset, 0);
4344     EmitInt32(offset - instruction_size);
4345   } else {
4346     EmitLabelLink(label);
4347   }
4348 }
4349 
4350 
EmitLabelLink(Label * label)4351 void X86_64Assembler::EmitLabelLink(Label* label) {
4352   CHECK(!label->IsBound());
4353   int position = buffer_.Size();
4354   EmitInt32(label->position_);
4355   label->LinkTo(position);
4356 }
4357 
4358 
EmitLabelLink(NearLabel * label)4359 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
4360   CHECK(!label->IsBound());
4361   int position = buffer_.Size();
4362   if (label->IsLinked()) {
4363     // Save the delta in the byte that we have to play with.
4364     uint32_t delta = position - label->LinkPosition();
4365     CHECK(IsUint<8>(delta));
4366     EmitUint8(delta & 0xFF);
4367   } else {
4368     EmitUint8(0);
4369   }
4370   label->LinkTo(position);
4371 }
4372 
4373 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)4374 void X86_64Assembler::EmitGenericShift(bool wide,
4375                                        int reg_or_opcode,
4376                                        CpuRegister reg,
4377                                        const Immediate& imm) {
4378   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4379   CHECK(imm.is_int8());
4380   if (wide) {
4381     EmitRex64(reg);
4382   } else {
4383     EmitOptionalRex32(reg);
4384   }
4385   if (imm.value() == 1) {
4386     EmitUint8(0xD1);
4387     EmitOperand(reg_or_opcode, Operand(reg));
4388   } else {
4389     EmitUint8(0xC1);
4390     EmitOperand(reg_or_opcode, Operand(reg));
4391     EmitUint8(imm.value() & 0xFF);
4392   }
4393 }
4394 
4395 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)4396 void X86_64Assembler::EmitGenericShift(bool wide,
4397                                        int reg_or_opcode,
4398                                        CpuRegister operand,
4399                                        CpuRegister shifter) {
4400   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4401   CHECK_EQ(shifter.AsRegister(), RCX);
4402   if (wide) {
4403     EmitRex64(operand);
4404   } else {
4405     EmitOptionalRex32(operand);
4406   }
4407   EmitUint8(0xD3);
4408   EmitOperand(reg_or_opcode, Operand(operand));
4409 }
4410 
EmitMovCpuFpu(XmmRegister fp_reg,CpuRegister cpu_reg,bool is64bit,uint8_t opcode)4411 void X86_64Assembler::EmitMovCpuFpu(
4412     XmmRegister fp_reg, CpuRegister cpu_reg, bool is64bit, uint8_t opcode) {
4413   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4414   EmitUint8(0x66);
4415   EmitOptionalRex(false, is64bit, fp_reg.NeedsRex(), false, cpu_reg.NeedsRex());
4416   EmitUint8(0x0F);
4417   EmitUint8(opcode);
4418   EmitOperand(fp_reg.LowBits(), Operand(cpu_reg));
4419 }
4420 
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)4421 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
4422   // REX.WRXB
4423   // W - 64-bit operand
4424   // R - MODRM.reg
4425   // X - SIB.index
4426   // B - MODRM.rm/SIB.base
4427   uint8_t rex = force ? 0x40 : 0;
4428   if (w) {
4429     rex |= 0x48;  // REX.W000
4430   }
4431   if (r) {
4432     rex |= 0x44;  // REX.0R00
4433   }
4434   if (x) {
4435     rex |= 0x42;  // REX.00X0
4436   }
4437   if (b) {
4438     rex |= 0x41;  // REX.000B
4439   }
4440   if (rex != 0) {
4441     EmitUint8(rex);
4442   }
4443 }
4444 
EmitOptionalRex32(CpuRegister reg)4445 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
4446   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
4447 }
4448 
EmitOptionalRex32(CpuRegister dst,CpuRegister src)4449 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
4450   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4451 }
4452 
EmitOptionalRex32(XmmRegister dst,XmmRegister src)4453 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
4454   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4455 }
4456 
EmitOptionalRex32(CpuRegister dst,XmmRegister src)4457 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
4458   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4459 }
4460 
EmitOptionalRex32(XmmRegister dst,CpuRegister src)4461 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
4462   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4463 }
4464 
EmitOptionalRex32(const Operand & operand)4465 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
4466   uint8_t rex = operand.rex();
4467   if (rex != 0) {
4468     EmitUint8(rex);
4469   }
4470 }
4471 
EmitOptionalRex32(CpuRegister dst,const Operand & operand)4472 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
4473   uint8_t rex = operand.rex();
4474   if (dst.NeedsRex()) {
4475     rex |= 0x44;  // REX.0R00
4476   }
4477   if (rex != 0) {
4478     EmitUint8(rex);
4479   }
4480 }
4481 
EmitOptionalRex32(XmmRegister dst,const Operand & operand)4482 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
4483   uint8_t rex = operand.rex();
4484   if (dst.NeedsRex()) {
4485     rex |= 0x44;  // REX.0R00
4486   }
4487   if (rex != 0) {
4488     EmitUint8(rex);
4489   }
4490 }
4491 
EmitRex64()4492 void X86_64Assembler::EmitRex64() {
4493   EmitOptionalRex(false, true, false, false, false);
4494 }
4495 
EmitRex64(CpuRegister reg)4496 void X86_64Assembler::EmitRex64(CpuRegister reg) {
4497   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
4498 }
4499 
EmitRex64(const Operand & operand)4500 void X86_64Assembler::EmitRex64(const Operand& operand) {
4501   uint8_t rex = operand.rex();
4502   rex |= 0x48;  // REX.W000
4503   EmitUint8(rex);
4504 }
4505 
EmitRex64(CpuRegister dst,CpuRegister src)4506 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
4507   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4508 }
4509 
EmitRex64(XmmRegister dst,CpuRegister src)4510 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
4511   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4512 }
4513 
EmitRex64(CpuRegister dst,XmmRegister src)4514 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
4515   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4516 }
4517 
EmitRex64(CpuRegister dst,const Operand & operand)4518 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
4519   uint8_t rex = 0x48 | operand.rex();  // REX.W000
4520   if (dst.NeedsRex()) {
4521     rex |= 0x44;  // REX.0R00
4522   }
4523   EmitUint8(rex);
4524 }
4525 
EmitRex64(XmmRegister dst,const Operand & operand)4526 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
4527   uint8_t rex = 0x48 | operand.rex();  // REX.W000
4528   if (dst.NeedsRex()) {
4529     rex |= 0x44;  // REX.0R00
4530   }
4531   EmitUint8(rex);
4532 }
4533 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)4534 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
4535                                                           CpuRegister src,
4536                                                           bool normalize_both) {
4537   // SPL, BPL, SIL, DIL need the REX prefix.
4538   bool force = src.AsRegister() > 3;
4539   if (normalize_both) {
4540     // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
4541     // prefix if either `src` or `dst` needs it.
4542     force |= dst.AsRegister() > 3;
4543   } else {
4544     // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
4545     // They need REX prefix only if `src` needs it, but not `dst`.
4546   }
4547   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
4548 }
4549 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)4550 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
4551   uint8_t rex = operand.rex();
4552   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
4553   bool force = dst.AsRegister() > 3;
4554   if (force) {
4555     rex |= 0x40;  // REX.0000
4556   }
4557   if (dst.NeedsRex()) {
4558     rex |= 0x44;  // REX.0R00
4559   }
4560   if (rex != 0) {
4561     EmitUint8(rex);
4562   }
4563 }
4564 
AddConstantArea()4565 void X86_64Assembler::AddConstantArea() {
4566   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
4567   for (size_t i = 0, e = area.size(); i < e; i++) {
4568     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4569     EmitInt32(area[i]);
4570   }
4571 }
4572 
AppendInt32(int32_t v)4573 size_t ConstantArea::AppendInt32(int32_t v) {
4574   size_t result = buffer_.size() * elem_size_;
4575   buffer_.push_back(v);
4576   return result;
4577 }
4578 
AddInt32(int32_t v)4579 size_t ConstantArea::AddInt32(int32_t v) {
4580   // Look for an existing match.
4581   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
4582     if (v == buffer_[i]) {
4583       return i * elem_size_;
4584     }
4585   }
4586 
4587   // Didn't match anything.
4588   return AppendInt32(v);
4589 }
4590 
AddInt64(int64_t v)4591 size_t ConstantArea::AddInt64(int64_t v) {
4592   int32_t v_low = v;
4593   int32_t v_high = v >> 32;
4594   if (buffer_.size() > 1) {
4595     // Ensure we don't pass the end of the buffer.
4596     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
4597       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
4598         return i * elem_size_;
4599       }
4600     }
4601   }
4602 
4603   // Didn't match anything.
4604   size_t result = buffer_.size() * elem_size_;
4605   buffer_.push_back(v_low);
4606   buffer_.push_back(v_high);
4607   return result;
4608 }
4609 
AddDouble(double v)4610 size_t ConstantArea::AddDouble(double v) {
4611   // Treat the value as a 64-bit integer value.
4612   return AddInt64(bit_cast<int64_t, double>(v));
4613 }
4614 
AddFloat(float v)4615 size_t ConstantArea::AddFloat(float v) {
4616   // Treat the value as a 32-bit integer value.
4617   return AddInt32(bit_cast<int32_t, float>(v));
4618 }
4619 
EmitVexPrefixByteZero(bool is_twobyte_form)4620 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
4621   // Vex Byte 0,
4622   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
4623   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
4624   uint8_t vex_prefix = 0xC0;
4625   if (is_twobyte_form) {
4626     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
4627   } else {
4628     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
4629   }
4630   return vex_prefix;
4631 }
4632 
EmitVexPrefixForAddress(const Address & addr,bool r,int vex_l,int vex_pp)4633 void X86_64Assembler::EmitVexPrefixForAddress(const Address& addr, bool r, int vex_l, int vex_pp) {
4634   uint8_t rex = addr.rex();
4635   bool rex_x = (rex & GET_REX_X) != 0u;
4636   bool rex_b = (rex & GET_REX_B) != 0u;
4637   bool is_twobyte_form = (!rex_b && !rex_x);
4638   uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
4639   uint8_t byte_one, byte_two;
4640   if (is_twobyte_form) {
4641     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
4642     byte_one = EmitVexPrefixByteOne(r, vvvv_reg, vex_l, vex_pp);
4643   } else {
4644     byte_one = EmitVexPrefixByteOne(r, rex_x, rex_b, SET_VEX_M_0F);
4645     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vex_l, vex_pp);
4646   }
4647   EmitUint8(byte_zero);
4648   EmitUint8(byte_one);
4649   if (!is_twobyte_form) {
4650     EmitUint8(byte_two);
4651   }
4652 }
4653 
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)4654 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
4655   // Vex Byte 1,
4656   uint8_t vex_prefix = VEX_INIT;
4657   /** Bit[7] This bit needs to be set to '1'
4658   otherwise the instruction is LES or LDS */
4659   if (!R) {
4660     // R .
4661     vex_prefix |= SET_VEX_R;
4662   }
4663   /** Bit[6] This bit needs to be set to '1'
4664   otherwise the instruction is LES or LDS */
4665   if (!X) {
4666     // X .
4667     vex_prefix |= SET_VEX_X;
4668   }
4669   /** Bit[5] This bit needs to be set to '1' */
4670   if (!B) {
4671     // B .
4672     vex_prefix |= SET_VEX_B;
4673   }
4674   /** Bits[4:0], Based on the instruction documentaion */
4675   vex_prefix |= SET_VEX_M;
4676   return vex_prefix;
4677 }
4678 
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)4679 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
4680                                               X86_64ManagedRegister operand,
4681                                               int SET_VEX_L,
4682                                               int SET_VEX_PP) {
4683   // Vex Byte 1,
4684   uint8_t vex_prefix = VEX_INIT;
4685   /** Bit[7] This bit needs to be set to '1'
4686   otherwise the instruction is LES or LDS */
4687   if (!R) {
4688     // R .
4689     vex_prefix |= SET_VEX_R;
4690   }
4691   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
4692   if (operand.IsNoRegister()) {
4693     vex_prefix |= 0x78;
4694   } else if (operand.IsXmmRegister()) {
4695     XmmRegister vvvv = operand.AsXmmRegister();
4696     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
4697     uint8_t reg = static_cast<uint8_t>(inverted_reg);
4698     vex_prefix |= ((reg & 0x0F) << 3);
4699   } else if (operand.IsCpuRegister()) {
4700     CpuRegister vvvv = operand.AsCpuRegister();
4701     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
4702     uint8_t reg = static_cast<uint8_t>(inverted_reg);
4703     vex_prefix |= ((reg & 0x0F) << 3);
4704   }
4705   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4706   VEX.L = 0 indicates 128 bit vector operation */
4707   vex_prefix |= SET_VEX_L;
4708   // Bits[1:0] -  "pp"
4709   vex_prefix |= SET_VEX_PP;
4710   return vex_prefix;
4711 }
4712 
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)4713 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
4714                                               X86_64ManagedRegister operand,
4715                                               int SET_VEX_L,
4716                                               int SET_VEX_PP) {
4717   // Vex Byte 2,
4718   uint8_t vex_prefix = VEX_INIT;
4719 
4720   /** Bit[7] This bits needs to be set to '1' with default value.
4721   When using C4H form of VEX prefix, REX.W value is ignored */
4722   if (W) {
4723     vex_prefix |= SET_VEX_W;
4724   }
4725   // Bits[6:3] - 'vvvv' the source or dest register specifier
4726   if (operand.IsXmmRegister()) {
4727     XmmRegister vvvv = operand.AsXmmRegister();
4728     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
4729     uint8_t reg = static_cast<uint8_t>(inverted_reg);
4730     vex_prefix |= ((reg & 0x0F) << 3);
4731   } else if (operand.IsCpuRegister()) {
4732     CpuRegister vvvv = operand.AsCpuRegister();
4733     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
4734     uint8_t reg = static_cast<uint8_t>(inverted_reg);
4735     vex_prefix |= ((reg & 0x0F) << 3);
4736   }
4737   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4738   VEX.L = 0 indicates 128 bit vector operation */
4739   vex_prefix |= SET_VEX_L;
4740   // Bits[1:0] -  "pp"
4741   vex_prefix |= SET_VEX_PP;
4742   return vex_prefix;
4743 }
4744 
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)4745 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
4746                                               int SET_VEX_L,
4747                                               int SET_VEX_PP) {
4748   // Vex Byte 2,
4749   uint8_t vex_prefix = VEX_INIT;
4750 
4751   /** Bit[7] This bits needs to be set to '1' with default value.
4752   When using C4H form of VEX prefix, REX.W value is ignored */
4753   if (W) {
4754     vex_prefix |= SET_VEX_W;
4755   }
4756   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
4757   vex_prefix |= (0x0F << 3);
4758   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4759   VEX.L = 0 indicates 128 bit vector operation */
4760   vex_prefix |= SET_VEX_L;
4761 
4762   // Bits[1:0] -  "pp"
4763   if (SET_VEX_PP != SET_VEX_PP_NONE) {
4764     vex_prefix |= SET_VEX_PP;
4765   }
4766   return vex_prefix;
4767 }
4768 
EmitVecArithAndLogicalOperation(XmmRegister dst,XmmRegister src1,XmmRegister src2,uint8_t opcode,int vex_pp,bool is_commutative)4769 void X86_64Assembler::EmitVecArithAndLogicalOperation(XmmRegister dst,
4770                                                       XmmRegister src1,
4771                                                       XmmRegister src2,
4772                                                       uint8_t opcode,
4773                                                       int vex_pp,
4774                                                       bool is_commutative) {
4775   if (is_commutative && src2.NeedsRex() && !src1.NeedsRex()) {
4776     return EmitVecArithAndLogicalOperation(dst, src2, src1, opcode, vex_pp, is_commutative);
4777   }
4778   DCHECK(CpuHasAVXorAVX2FeatureFlag());
4779   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4780   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
4781   bool is_twobyte_form = !src2.NeedsRex();
4782   uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
4783   uint8_t byte_one, byte_two;
4784   if (is_twobyte_form) {
4785     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, vex_pp);
4786   } else {
4787     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
4788     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, vex_pp);
4789   }
4790   EmitUint8(byte_zero);
4791   EmitUint8(byte_one);
4792   if (!is_twobyte_form) {
4793     EmitUint8(byte_two);
4794   }
4795   EmitUint8(opcode);
4796   EmitXmmRegisterOperand(dst.LowBits(), src2);
4797 }
4798 
4799 }  // namespace x86_64
4800 }  // namespace art
4801