1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "assembler_x86_64.h"
18
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 namespace art HIDDEN {
25 namespace x86_64 {
26
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28 return os << reg.AsRegister();
29 }
30
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32 return os << reg.AsFloatRegister();
33 }
34
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36 return os << "ST" << static_cast<int>(reg);
37 }
38
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40 switch (addr.mod()) {
41 case 0:
42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43 return os << "(%" << addr.cpu_rm() << ")";
44 } else if (addr.base() == RBP) {
45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46 << "," << (1 << addr.scale()) << ")";
47 }
48 return os << "(%" << addr.cpu_base() << ",%"
49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50 case 1:
51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53 }
54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56 case 2:
57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59 }
60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62 default:
63 return os << "<address?>";
64 }
65 }
66
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68 if (has_AVX_ || has_AVX2_) {
69 return true;
70 }
71 return false;
72 }
73
74
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77 EmitOptionalRex32(reg);
78 EmitUint8(0xFF);
79 EmitRegisterOperand(2, reg.LowBits());
80 }
81
82
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85 EmitOptionalRex32(address);
86 EmitUint8(0xFF);
87 EmitOperand(2, address);
88 }
89
90
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93 EmitUint8(0xE8);
94 static const int kSize = 5;
95 // Offset by one because we already have emitted the opcode.
96 EmitLabel(label, kSize - 1);
97 }
98
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101 EmitOptionalRex32(reg);
102 EmitUint8(0x50 + reg.LowBits());
103 }
104
105
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108 EmitOptionalRex32(address);
109 EmitUint8(0xFF);
110 EmitOperand(6, address);
111 }
112
113
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116 CHECK(imm.is_int32()); // pushq only supports 32b immediate.
117 if (imm.is_int8()) {
118 EmitUint8(0x6A);
119 EmitUint8(imm.value() & 0xFF);
120 } else {
121 EmitUint8(0x68);
122 EmitImmediate(imm);
123 }
124 }
125
126
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129 EmitOptionalRex32(reg);
130 EmitUint8(0x58 + reg.LowBits());
131 }
132
133
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136 EmitOptionalRex32(address);
137 EmitUint8(0x8F);
138 EmitOperand(0, address);
139 }
140
141
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144 if (imm.is_int32()) {
145 // 32 bit. Note: sign-extends.
146 EmitRex64(dst);
147 EmitUint8(0xC7);
148 EmitRegisterOperand(0, dst.LowBits());
149 EmitInt32(static_cast<int32_t>(imm.value()));
150 } else {
151 EmitRex64(dst);
152 EmitUint8(0xB8 + dst.LowBits());
153 EmitInt64(imm.value());
154 }
155 }
156
157
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159 CHECK(imm.is_int32());
160 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161 EmitOptionalRex32(dst);
162 EmitUint8(0xB8 + dst.LowBits());
163 EmitImmediate(imm);
164 }
165
166
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168 CHECK(imm.is_int32());
169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170 EmitRex64(dst);
171 EmitUint8(0xC7);
172 EmitOperand(0, dst);
173 EmitImmediate(imm);
174 }
175
176
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180 EmitRex64(src, dst);
181 EmitUint8(0x89);
182 EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184
185
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188 EmitOptionalRex32(dst, src);
189 EmitUint8(0x8B);
190 EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192
193
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196 EmitRex64(dst, src);
197 EmitUint8(0x8B);
198 EmitOperand(dst.LowBits(), src);
199 }
200
201
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204 EmitOptionalRex32(dst, src);
205 EmitUint8(0x8B);
206 EmitOperand(dst.LowBits(), src);
207 }
208
209
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212 EmitRex64(src, dst);
213 EmitUint8(0x89);
214 EmitOperand(src.LowBits(), dst);
215 }
216
217
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220 EmitOptionalRex32(src, dst);
221 EmitUint8(0x89);
222 EmitOperand(src.LowBits(), dst);
223 }
224
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227 EmitOptionalRex32(dst);
228 EmitUint8(0xC7);
229 EmitOperand(0, dst);
230 EmitImmediate(imm);
231 }
232
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235 EmitOptionalRex32(src, dst);
236 EmitUint8(0x0F);
237 EmitUint8(0xC3);
238 EmitOperand(src.LowBits(), dst);
239 }
240
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243 EmitRex64(src, dst);
244 EmitUint8(0x0F);
245 EmitUint8(0xC3);
246 EmitOperand(src.LowBits(), dst);
247 }
248
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250 cmov(c, dst, src, true);
251 }
252
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256 EmitUint8(0x0F);
257 EmitUint8(0x40 + c);
258 EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260
261
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264 if (is64bit) {
265 EmitRex64(dst, src);
266 } else {
267 EmitOptionalRex32(dst, src);
268 }
269 EmitUint8(0x0F);
270 EmitUint8(0x40 + c);
271 EmitOperand(dst.LowBits(), src);
272 }
273
274
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277 EmitOptionalByteRegNormalizingRex32(dst, src);
278 EmitUint8(0x0F);
279 EmitUint8(0xB6);
280 EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282
283
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286 // Byte register is only in the source register form, so we don't use
287 // EmitOptionalByteRegNormalizingRex32(dst, src);
288 EmitOptionalRex32(dst, src);
289 EmitUint8(0x0F);
290 EmitUint8(0xB6);
291 EmitOperand(dst.LowBits(), src);
292 }
293
294
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297 EmitOptionalByteRegNormalizingRex32(dst, src);
298 EmitUint8(0x0F);
299 EmitUint8(0xBE);
300 EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302
303
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306 // Byte register is only in the source register form, so we don't use
307 // EmitOptionalByteRegNormalizingRex32(dst, src);
308 EmitOptionalRex32(dst, src);
309 EmitUint8(0x0F);
310 EmitUint8(0xBE);
311 EmitOperand(dst.LowBits(), src);
312 }
313
314
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316 LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318
319
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322 EmitOptionalByteRegNormalizingRex32(src, dst);
323 EmitUint8(0x88);
324 EmitOperand(src.LowBits(), dst);
325 }
326
327
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330 EmitOptionalRex32(dst);
331 EmitUint8(0xC6);
332 EmitOperand(Register::RAX, dst);
333 CHECK(imm.is_int8());
334 EmitUint8(imm.value() & 0xFF);
335 }
336
337
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340 EmitOptionalRex32(dst, src);
341 EmitUint8(0x0F);
342 EmitUint8(0xB7);
343 EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345
346
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349 EmitOptionalRex32(dst, src);
350 EmitUint8(0x0F);
351 EmitUint8(0xB7);
352 EmitOperand(dst.LowBits(), src);
353 }
354
355
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358 EmitOptionalRex32(dst, src);
359 EmitUint8(0x0F);
360 EmitUint8(0xBF);
361 EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363
364
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367 EmitOptionalRex32(dst, src);
368 EmitUint8(0x0F);
369 EmitUint8(0xBF);
370 EmitOperand(dst.LowBits(), src);
371 }
372
373
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375 LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377
378
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381 EmitOperandSizeOverride();
382 EmitOptionalRex32(src, dst);
383 EmitUint8(0x89);
384 EmitOperand(src.LowBits(), dst);
385 }
386
387
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390 EmitOperandSizeOverride();
391 EmitOptionalRex32(dst);
392 EmitUint8(0xC7);
393 EmitOperand(Register::RAX, dst);
394 CHECK(imm.is_uint16() || imm.is_int16());
395 EmitUint8(imm.value() & 0xFF);
396 EmitUint8(imm.value() >> 8);
397 }
398
399
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402 EmitRex64(dst, src);
403 EmitUint8(0x8D);
404 EmitOperand(dst.LowBits(), src);
405 }
406
407
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410 EmitOptionalRex32(dst, src);
411 EmitUint8(0x8D);
412 EmitOperand(dst.LowBits(), src);
413 }
414
415
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417 if (CpuHasAVXorAVX2FeatureFlag()) {
418 vmovaps(dst, src);
419 return;
420 }
421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422 EmitOptionalRex32(dst, src);
423 EmitUint8(0x0F);
424 EmitUint8(0x28);
425 EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427
428
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431 DCHECK(CpuHasAVXorAVX2FeatureFlag());
432 uint8_t byte_zero, byte_one, byte_two;
433 bool is_twobyte_form = true;
434 bool load = dst.NeedsRex();
435 bool store = !load;
436
437 if (src.NeedsRex()&& dst.NeedsRex()) {
438 is_twobyte_form = false;
439 }
440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441 // Instruction VEX Prefix
442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444 if (is_twobyte_form) {
445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446 byte_one = EmitVexPrefixByteOne(rex_bit,
447 vvvv_reg,
448 SET_VEX_L_128,
449 SET_VEX_PP_NONE);
450 } else {
451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452 /*X=*/ false,
453 src.NeedsRex(),
454 SET_VEX_M_0F);
455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456 SET_VEX_L_128,
457 SET_VEX_PP_NONE);
458 }
459 EmitUint8(byte_zero);
460 EmitUint8(byte_one);
461 if (!is_twobyte_form) {
462 EmitUint8(byte_two);
463 }
464 // Instruction Opcode
465 if (is_twobyte_form && store) {
466 EmitUint8(0x29);
467 } else {
468 EmitUint8(0x28);
469 }
470 // Instruction Operands
471 if (is_twobyte_form && store) {
472 EmitXmmRegisterOperand(src.LowBits(), dst);
473 } else {
474 EmitXmmRegisterOperand(dst.LowBits(), src);
475 }
476 }
477
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479 if (CpuHasAVXorAVX2FeatureFlag()) {
480 vmovaps(dst, src);
481 return;
482 }
483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484 EmitOptionalRex32(dst, src);
485 EmitUint8(0x0F);
486 EmitUint8(0x28);
487 EmitOperand(dst.LowBits(), src);
488 }
489
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492 DCHECK(CpuHasAVXorAVX2FeatureFlag());
493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494 // Instruction VEX Prefix
495 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
496 // Instruction Opcode
497 EmitUint8(0x28);
498 // Instruction Operands
499 EmitOperand(dst.LowBits(), src);
500 }
501
movups(XmmRegister dst,const Address & src)502 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
503 if (CpuHasAVXorAVX2FeatureFlag()) {
504 vmovups(dst, src);
505 return;
506 }
507 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
508 EmitOptionalRex32(dst, src);
509 EmitUint8(0x0F);
510 EmitUint8(0x10);
511 EmitOperand(dst.LowBits(), src);
512 }
513
514 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)515 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
516 DCHECK(CpuHasAVXorAVX2FeatureFlag());
517 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
518 // Instruction VEX Prefix
519 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
520 // Instruction Opcode
521 EmitUint8(0x10);
522 // Instruction Operands
523 EmitOperand(dst.LowBits(), src);
524 }
525
526
movaps(const Address & dst,XmmRegister src)527 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
528 if (CpuHasAVXorAVX2FeatureFlag()) {
529 vmovaps(dst, src);
530 return;
531 }
532 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
533 EmitOptionalRex32(src, dst);
534 EmitUint8(0x0F);
535 EmitUint8(0x29);
536 EmitOperand(src.LowBits(), dst);
537 }
538
539 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)540 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
541 DCHECK(CpuHasAVXorAVX2FeatureFlag());
542 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
543 // Instruction VEX Prefix
544 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
545 // Instruction Opcode
546 EmitUint8(0x29);
547 // Instruction Operands
548 EmitOperand(src.LowBits(), dst);
549 }
550
movups(const Address & dst,XmmRegister src)551 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
552 if (CpuHasAVXorAVX2FeatureFlag()) {
553 vmovups(dst, src);
554 return;
555 }
556 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
557 EmitOptionalRex32(src, dst);
558 EmitUint8(0x0F);
559 EmitUint8(0x11);
560 EmitOperand(src.LowBits(), dst);
561 }
562
563 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)564 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
565 DCHECK(CpuHasAVXorAVX2FeatureFlag());
566 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
567 // Instruction VEX Prefix
568 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_NONE);
569 // Instruction Opcode
570 EmitUint8(0x11);
571 // Instruction Operands
572 EmitOperand(src.LowBits(), dst);
573 }
574
575
movss(XmmRegister dst,const Address & src)576 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
577 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
578 EmitUint8(0xF3);
579 EmitOptionalRex32(dst, src);
580 EmitUint8(0x0F);
581 EmitUint8(0x10);
582 EmitOperand(dst.LowBits(), src);
583 }
584
585
movss(const Address & dst,XmmRegister src)586 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
587 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
588 EmitUint8(0xF3);
589 EmitOptionalRex32(src, dst);
590 EmitUint8(0x0F);
591 EmitUint8(0x11);
592 EmitOperand(src.LowBits(), dst);
593 }
594
595
movss(XmmRegister dst,XmmRegister src)596 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
597 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
598 EmitUint8(0xF3);
599 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
600 EmitUint8(0x0F);
601 EmitUint8(0x11);
602 EmitXmmRegisterOperand(src.LowBits(), dst);
603 }
604
605
movsxd(CpuRegister dst,CpuRegister src)606 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
607 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
608 EmitRex64(dst, src);
609 EmitUint8(0x63);
610 EmitRegisterOperand(dst.LowBits(), src.LowBits());
611 }
612
613
movsxd(CpuRegister dst,const Address & src)614 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
615 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
616 EmitRex64(dst, src);
617 EmitUint8(0x63);
618 EmitOperand(dst.LowBits(), src);
619 }
620
621
movq(XmmRegister dst,CpuRegister src)622 void X86_64Assembler::movq(XmmRegister dst, CpuRegister src) {
623 EmitMovCpuFpu(dst, src, /*is64bit=*/ true, /*opcode=*/ 0x6E);
624 }
625
626
movq(CpuRegister dst,XmmRegister src)627 void X86_64Assembler::movq(CpuRegister dst, XmmRegister src) {
628 EmitMovCpuFpu(src, dst, /*is64bit=*/ true, /*opcode=*/ 0x7E);
629 }
630
631
movd(XmmRegister dst,CpuRegister src)632 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
633 EmitMovCpuFpu(dst, src, /*is64bit=*/ false, /*opcode=*/ 0x6E);
634 }
635
636
movd(CpuRegister dst,XmmRegister src)637 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
638 EmitMovCpuFpu(src, dst, /*is64bit=*/ false, /*opcode=*/ 0x7E);
639 }
640
641
addss(XmmRegister dst,XmmRegister src)642 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
643 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
644 EmitUint8(0xF3);
645 EmitOptionalRex32(dst, src);
646 EmitUint8(0x0F);
647 EmitUint8(0x58);
648 EmitXmmRegisterOperand(dst.LowBits(), src);
649 }
650
addss(XmmRegister dst,const Address & src)651 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
652 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
653 EmitUint8(0xF3);
654 EmitOptionalRex32(dst, src);
655 EmitUint8(0x0F);
656 EmitUint8(0x58);
657 EmitOperand(dst.LowBits(), src);
658 }
659
660
subss(XmmRegister dst,XmmRegister src)661 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
662 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
663 EmitUint8(0xF3);
664 EmitOptionalRex32(dst, src);
665 EmitUint8(0x0F);
666 EmitUint8(0x5C);
667 EmitXmmRegisterOperand(dst.LowBits(), src);
668 }
669
670
subss(XmmRegister dst,const Address & src)671 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
672 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
673 EmitUint8(0xF3);
674 EmitOptionalRex32(dst, src);
675 EmitUint8(0x0F);
676 EmitUint8(0x5C);
677 EmitOperand(dst.LowBits(), src);
678 }
679
680
mulss(XmmRegister dst,XmmRegister src)681 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
682 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
683 EmitUint8(0xF3);
684 EmitOptionalRex32(dst, src);
685 EmitUint8(0x0F);
686 EmitUint8(0x59);
687 EmitXmmRegisterOperand(dst.LowBits(), src);
688 }
689
690
mulss(XmmRegister dst,const Address & src)691 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
692 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
693 EmitUint8(0xF3);
694 EmitOptionalRex32(dst, src);
695 EmitUint8(0x0F);
696 EmitUint8(0x59);
697 EmitOperand(dst.LowBits(), src);
698 }
699
700
divss(XmmRegister dst,XmmRegister src)701 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
702 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
703 EmitUint8(0xF3);
704 EmitOptionalRex32(dst, src);
705 EmitUint8(0x0F);
706 EmitUint8(0x5E);
707 EmitXmmRegisterOperand(dst.LowBits(), src);
708 }
709
710
divss(XmmRegister dst,const Address & src)711 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
712 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
713 EmitUint8(0xF3);
714 EmitOptionalRex32(dst, src);
715 EmitUint8(0x0F);
716 EmitUint8(0x5E);
717 EmitOperand(dst.LowBits(), src);
718 }
719
720
addps(XmmRegister dst,XmmRegister src)721 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
722 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
723 EmitOptionalRex32(dst, src);
724 EmitUint8(0x0F);
725 EmitUint8(0x58);
726 EmitXmmRegisterOperand(dst.LowBits(), src);
727 }
728
729
subps(XmmRegister dst,XmmRegister src)730 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
731 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
732 EmitOptionalRex32(dst, src);
733 EmitUint8(0x0F);
734 EmitUint8(0x5C);
735 EmitXmmRegisterOperand(dst.LowBits(), src);
736 }
737
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)738 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
739 EmitVecArithAndLogicalOperation(
740 dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_NONE, /*is_commutative=*/ true);
741 }
742
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)743 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
744 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_NONE);
745 }
746
747
mulps(XmmRegister dst,XmmRegister src)748 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
749 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
750 EmitOptionalRex32(dst, src);
751 EmitUint8(0x0F);
752 EmitUint8(0x59);
753 EmitXmmRegisterOperand(dst.LowBits(), src);
754 }
755
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)756 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
757 EmitVecArithAndLogicalOperation(
758 dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_NONE, /*is_commutative=*/ true);
759 }
760
divps(XmmRegister dst,XmmRegister src)761 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
762 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
763 EmitOptionalRex32(dst, src);
764 EmitUint8(0x0F);
765 EmitUint8(0x5E);
766 EmitXmmRegisterOperand(dst.LowBits(), src);
767 }
768
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)769 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
770 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_NONE);
771 }
772
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)773 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
774 DCHECK(CpuHasAVXorAVX2FeatureFlag());
775 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
776 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
777 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
778 X86_64ManagedRegister vvvv_reg =
779 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
780 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
781 /*X=*/ false,
782 right.NeedsRex(),
783 SET_VEX_M_0F_38);
784 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
785 EmitUint8(ByteZero);
786 EmitUint8(ByteOne);
787 EmitUint8(ByteTwo);
788 EmitUint8(0xA9);
789 EmitXmmRegisterOperand(acc.LowBits(), right);
790 }
791
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)792 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
793 DCHECK(CpuHasAVXorAVX2FeatureFlag());
794 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
795 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
796 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
797 X86_64ManagedRegister vvvv_reg =
798 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
799 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
800 /*X=*/ false,
801 right.NeedsRex(),
802 SET_VEX_M_0F_38);
803 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
804 EmitUint8(ByteZero);
805 EmitUint8(ByteOne);
806 EmitUint8(ByteTwo);
807 EmitUint8(0xA9);
808 EmitXmmRegisterOperand(acc.LowBits(), right);
809 }
flds(const Address & src)810 void X86_64Assembler::flds(const Address& src) {
811 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
812 EmitUint8(0xD9);
813 EmitOperand(0, src);
814 }
815
816
fsts(const Address & dst)817 void X86_64Assembler::fsts(const Address& dst) {
818 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
819 EmitUint8(0xD9);
820 EmitOperand(2, dst);
821 }
822
823
fstps(const Address & dst)824 void X86_64Assembler::fstps(const Address& dst) {
825 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
826 EmitUint8(0xD9);
827 EmitOperand(3, dst);
828 }
829
830
movapd(XmmRegister dst,XmmRegister src)831 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
832 if (CpuHasAVXorAVX2FeatureFlag()) {
833 vmovapd(dst, src);
834 return;
835 }
836 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
837 EmitUint8(0x66);
838 EmitOptionalRex32(dst, src);
839 EmitUint8(0x0F);
840 EmitUint8(0x28);
841 EmitXmmRegisterOperand(dst.LowBits(), src);
842 }
843
844 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)845 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
846 DCHECK(CpuHasAVXorAVX2FeatureFlag());
847 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
848 uint8_t ByteZero, ByteOne, ByteTwo;
849 bool is_twobyte_form = true;
850
851 if (src.NeedsRex() && dst.NeedsRex()) {
852 is_twobyte_form = false;
853 }
854 // Instruction VEX Prefix
855 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
856 bool load = dst.NeedsRex();
857 if (is_twobyte_form) {
858 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
859 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
860 ByteOne = EmitVexPrefixByteOne(rex_bit,
861 vvvv_reg,
862 SET_VEX_L_128,
863 SET_VEX_PP_66);
864 } else {
865 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
866 /*X=*/ false,
867 src.NeedsRex(),
868 SET_VEX_M_0F);
869 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
870 SET_VEX_L_128,
871 SET_VEX_PP_66);
872 }
873 EmitUint8(ByteZero);
874 EmitUint8(ByteOne);
875 if (!is_twobyte_form) {
876 EmitUint8(ByteTwo);
877 }
878 // Instruction Opcode
879 if (is_twobyte_form && !load) {
880 EmitUint8(0x29);
881 } else {
882 EmitUint8(0x28);
883 }
884 // Instruction Operands
885 if (is_twobyte_form && !load) {
886 EmitXmmRegisterOperand(src.LowBits(), dst);
887 } else {
888 EmitXmmRegisterOperand(dst.LowBits(), src);
889 }
890 }
891
movapd(XmmRegister dst,const Address & src)892 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
893 if (CpuHasAVXorAVX2FeatureFlag()) {
894 vmovapd(dst, src);
895 return;
896 }
897 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
898 EmitUint8(0x66);
899 EmitOptionalRex32(dst, src);
900 EmitUint8(0x0F);
901 EmitUint8(0x28);
902 EmitOperand(dst.LowBits(), src);
903 }
904
905 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)906 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
907 DCHECK(CpuHasAVXorAVX2FeatureFlag());
908 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
909 // Instruction VEX Prefix
910 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
911 // Instruction Opcode
912 EmitUint8(0x28);
913 // Instruction Operands
914 EmitOperand(dst.LowBits(), src);
915 }
916
movupd(XmmRegister dst,const Address & src)917 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
918 if (CpuHasAVXorAVX2FeatureFlag()) {
919 vmovupd(dst, src);
920 return;
921 }
922 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
923 EmitUint8(0x66);
924 EmitOptionalRex32(dst, src);
925 EmitUint8(0x0F);
926 EmitUint8(0x10);
927 EmitOperand(dst.LowBits(), src);
928 }
929
930 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)931 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
932 DCHECK(CpuHasAVXorAVX2FeatureFlag());
933 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
934 // Instruction VEX Prefix
935 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
936 // Instruction Opcode
937 EmitUint8(0x10);
938 // Instruction Operands
939 EmitOperand(dst.LowBits(), src);
940 }
941
movapd(const Address & dst,XmmRegister src)942 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
943 if (CpuHasAVXorAVX2FeatureFlag()) {
944 vmovapd(dst, src);
945 return;
946 }
947 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
948 EmitUint8(0x66);
949 EmitOptionalRex32(src, dst);
950 EmitUint8(0x0F);
951 EmitUint8(0x29);
952 EmitOperand(src.LowBits(), dst);
953 }
954
955 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)956 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
957 DCHECK(CpuHasAVXorAVX2FeatureFlag());
958 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
959 // Instruction VEX Prefix
960 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
961 // Instruction Opcode
962 EmitUint8(0x29);
963 // Instruction Operands
964 EmitOperand(src.LowBits(), dst);
965 }
966
movupd(const Address & dst,XmmRegister src)967 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
968 if (CpuHasAVXorAVX2FeatureFlag()) {
969 vmovupd(dst, src);
970 return;
971 }
972 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
973 EmitUint8(0x66);
974 EmitOptionalRex32(src, dst);
975 EmitUint8(0x0F);
976 EmitUint8(0x11);
977 EmitOperand(src.LowBits(), dst);
978 }
979
980 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)981 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
982 DCHECK(CpuHasAVXorAVX2FeatureFlag());
983 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
984 // Instruction VEX Prefix
985 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
986 // Instruction Opcode
987 EmitUint8(0x11);
988 // Instruction Operands
989 EmitOperand(src.LowBits(), dst);
990 }
991
992
movsd(XmmRegister dst,const Address & src)993 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
994 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
995 EmitUint8(0xF2);
996 EmitOptionalRex32(dst, src);
997 EmitUint8(0x0F);
998 EmitUint8(0x10);
999 EmitOperand(dst.LowBits(), src);
1000 }
1001
1002
movsd(const Address & dst,XmmRegister src)1003 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1005 EmitUint8(0xF2);
1006 EmitOptionalRex32(src, dst);
1007 EmitUint8(0x0F);
1008 EmitUint8(0x11);
1009 EmitOperand(src.LowBits(), dst);
1010 }
1011
1012
movsd(XmmRegister dst,XmmRegister src)1013 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1014 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1015 EmitUint8(0xF2);
1016 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
1017 EmitUint8(0x0F);
1018 EmitUint8(0x11);
1019 EmitXmmRegisterOperand(src.LowBits(), dst);
1020 }
1021
1022
addsd(XmmRegister dst,XmmRegister src)1023 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1024 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025 EmitUint8(0xF2);
1026 EmitOptionalRex32(dst, src);
1027 EmitUint8(0x0F);
1028 EmitUint8(0x58);
1029 EmitXmmRegisterOperand(dst.LowBits(), src);
1030 }
1031
1032
addsd(XmmRegister dst,const Address & src)1033 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1034 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1035 EmitUint8(0xF2);
1036 EmitOptionalRex32(dst, src);
1037 EmitUint8(0x0F);
1038 EmitUint8(0x58);
1039 EmitOperand(dst.LowBits(), src);
1040 }
1041
1042
subsd(XmmRegister dst,XmmRegister src)1043 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1044 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1045 EmitUint8(0xF2);
1046 EmitOptionalRex32(dst, src);
1047 EmitUint8(0x0F);
1048 EmitUint8(0x5C);
1049 EmitXmmRegisterOperand(dst.LowBits(), src);
1050 }
1051
1052
subsd(XmmRegister dst,const Address & src)1053 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1054 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1055 EmitUint8(0xF2);
1056 EmitOptionalRex32(dst, src);
1057 EmitUint8(0x0F);
1058 EmitUint8(0x5C);
1059 EmitOperand(dst.LowBits(), src);
1060 }
1061
1062
mulsd(XmmRegister dst,XmmRegister src)1063 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1064 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1065 EmitUint8(0xF2);
1066 EmitOptionalRex32(dst, src);
1067 EmitUint8(0x0F);
1068 EmitUint8(0x59);
1069 EmitXmmRegisterOperand(dst.LowBits(), src);
1070 }
1071
1072
mulsd(XmmRegister dst,const Address & src)1073 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1074 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075 EmitUint8(0xF2);
1076 EmitOptionalRex32(dst, src);
1077 EmitUint8(0x0F);
1078 EmitUint8(0x59);
1079 EmitOperand(dst.LowBits(), src);
1080 }
1081
1082
divsd(XmmRegister dst,XmmRegister src)1083 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1084 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1085 EmitUint8(0xF2);
1086 EmitOptionalRex32(dst, src);
1087 EmitUint8(0x0F);
1088 EmitUint8(0x5E);
1089 EmitXmmRegisterOperand(dst.LowBits(), src);
1090 }
1091
1092
divsd(XmmRegister dst,const Address & src)1093 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1094 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1095 EmitUint8(0xF2);
1096 EmitOptionalRex32(dst, src);
1097 EmitUint8(0x0F);
1098 EmitUint8(0x5E);
1099 EmitOperand(dst.LowBits(), src);
1100 }
1101
1102
addpd(XmmRegister dst,XmmRegister src)1103 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1104 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1105 EmitUint8(0x66);
1106 EmitOptionalRex32(dst, src);
1107 EmitUint8(0x0F);
1108 EmitUint8(0x58);
1109 EmitXmmRegisterOperand(dst.LowBits(), src);
1110 }
1111
1112
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1113 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1114 EmitVecArithAndLogicalOperation(
1115 dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_66, /*is_commutative=*/ true);
1116 }
1117
1118
subpd(XmmRegister dst,XmmRegister src)1119 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1121 EmitUint8(0x66);
1122 EmitOptionalRex32(dst, src);
1123 EmitUint8(0x0F);
1124 EmitUint8(0x5C);
1125 EmitXmmRegisterOperand(dst.LowBits(), src);
1126 }
1127
1128
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1129 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1130 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_66);
1131 }
1132
1133
mulpd(XmmRegister dst,XmmRegister src)1134 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1136 EmitUint8(0x66);
1137 EmitOptionalRex32(dst, src);
1138 EmitUint8(0x0F);
1139 EmitUint8(0x59);
1140 EmitXmmRegisterOperand(dst.LowBits(), src);
1141 }
1142
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1143 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1144 EmitVecArithAndLogicalOperation(
1145 dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_66, /*is_commutative=*/ true);
1146 }
1147
divpd(XmmRegister dst,XmmRegister src)1148 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1149 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1150 EmitUint8(0x66);
1151 EmitOptionalRex32(dst, src);
1152 EmitUint8(0x0F);
1153 EmitUint8(0x5E);
1154 EmitXmmRegisterOperand(dst.LowBits(), src);
1155 }
1156
1157
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1158 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1159 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_66);
1160 }
1161
1162
movdqa(XmmRegister dst,XmmRegister src)1163 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1164 if (CpuHasAVXorAVX2FeatureFlag()) {
1165 vmovdqa(dst, src);
1166 return;
1167 }
1168 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1169 EmitUint8(0x66);
1170 EmitOptionalRex32(dst, src);
1171 EmitUint8(0x0F);
1172 EmitUint8(0x6F);
1173 EmitXmmRegisterOperand(dst.LowBits(), src);
1174 }
1175
1176 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1177 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1178 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1179 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1180 uint8_t ByteZero, ByteOne, ByteTwo;
1181 bool is_twobyte_form = true;
1182
1183 // Instruction VEX Prefix
1184 if (src.NeedsRex() && dst.NeedsRex()) {
1185 is_twobyte_form = false;
1186 }
1187 bool load = dst.NeedsRex();
1188 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1189 if (is_twobyte_form) {
1190 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1191 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1192 ByteOne = EmitVexPrefixByteOne(rex_bit,
1193 vvvv_reg,
1194 SET_VEX_L_128,
1195 SET_VEX_PP_66);
1196 } else {
1197 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1198 /*X=*/ false,
1199 src.NeedsRex(),
1200 SET_VEX_M_0F);
1201 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1202 SET_VEX_L_128,
1203 SET_VEX_PP_66);
1204 }
1205 EmitUint8(ByteZero);
1206 EmitUint8(ByteOne);
1207 if (!is_twobyte_form) {
1208 EmitUint8(ByteTwo);
1209 }
1210 // Instruction Opcode
1211 if (is_twobyte_form && !load) {
1212 EmitUint8(0x7F);
1213 } else {
1214 EmitUint8(0x6F);
1215 }
1216 // Instruction Operands
1217 if (is_twobyte_form && !load) {
1218 EmitXmmRegisterOperand(src.LowBits(), dst);
1219 } else {
1220 EmitXmmRegisterOperand(dst.LowBits(), src);
1221 }
1222 }
1223
movdqa(XmmRegister dst,const Address & src)1224 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1225 if (CpuHasAVXorAVX2FeatureFlag()) {
1226 vmovdqa(dst, src);
1227 return;
1228 }
1229 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1230 EmitUint8(0x66);
1231 EmitOptionalRex32(dst, src);
1232 EmitUint8(0x0F);
1233 EmitUint8(0x6F);
1234 EmitOperand(dst.LowBits(), src);
1235 }
1236
1237 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1238 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1239 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1240 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1241 // Instruction VEX Prefix
1242 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
1243 // Instruction Opcode
1244 EmitUint8(0x6F);
1245 // Instruction Operands
1246 EmitOperand(dst.LowBits(), src);
1247 }
1248
movdqu(XmmRegister dst,const Address & src)1249 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1250 if (CpuHasAVXorAVX2FeatureFlag()) {
1251 vmovdqu(dst, src);
1252 return;
1253 }
1254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1255 EmitUint8(0xF3);
1256 EmitOptionalRex32(dst, src);
1257 EmitUint8(0x0F);
1258 EmitUint8(0x6F);
1259 EmitOperand(dst.LowBits(), src);
1260 }
1261
1262 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1263 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1264 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1265 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1266 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1267 // Instruction VEX Prefix
1268 EmitVexPrefixForAddress(src, dst.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_F3);
1269 // Instruction Opcode
1270 EmitUint8(0x6F);
1271 // Instruction Operands
1272 EmitOperand(dst.LowBits(), src);
1273 }
1274
movdqa(const Address & dst,XmmRegister src)1275 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1276 if (CpuHasAVXorAVX2FeatureFlag()) {
1277 vmovdqa(dst, src);
1278 return;
1279 }
1280 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1281 EmitUint8(0x66);
1282 EmitOptionalRex32(src, dst);
1283 EmitUint8(0x0F);
1284 EmitUint8(0x7F);
1285 EmitOperand(src.LowBits(), dst);
1286 }
1287
1288 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1289 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1290 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1291 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1292 // Instruction VEX Prefix
1293 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_66);
1294 // Instruction Opcode
1295 EmitUint8(0x7F);
1296 // Instruction Operands
1297 EmitOperand(src.LowBits(), dst);
1298 }
1299
movdqu(const Address & dst,XmmRegister src)1300 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1301 if (CpuHasAVXorAVX2FeatureFlag()) {
1302 vmovdqu(dst, src);
1303 return;
1304 }
1305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306 EmitUint8(0xF3);
1307 EmitOptionalRex32(src, dst);
1308 EmitUint8(0x0F);
1309 EmitUint8(0x7F);
1310 EmitOperand(src.LowBits(), dst);
1311 }
1312
1313 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1314 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1315 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1316 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1317 // Instruction VEX Prefix
1318 EmitVexPrefixForAddress(dst, src.NeedsRex(), SET_VEX_L_128, SET_VEX_PP_F3);
1319 // Instruction Opcode
1320 EmitUint8(0x7F);
1321 // Instruction Operands
1322 EmitOperand(src.LowBits(), dst);
1323 }
1324
paddb(XmmRegister dst,XmmRegister src)1325 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1326 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1327 EmitUint8(0x66);
1328 EmitOptionalRex32(dst, src);
1329 EmitUint8(0x0F);
1330 EmitUint8(0xFC);
1331 EmitXmmRegisterOperand(dst.LowBits(), src);
1332 }
1333
1334
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1335 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1336 EmitVecArithAndLogicalOperation(
1337 dst, add_left, add_right, /*opcode=*/ 0xFC, SET_VEX_PP_66, /*is_commutative=*/ true);
1338 }
1339
1340
psubb(XmmRegister dst,XmmRegister src)1341 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1342 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1343 EmitUint8(0x66);
1344 EmitOptionalRex32(dst, src);
1345 EmitUint8(0x0F);
1346 EmitUint8(0xF8);
1347 EmitXmmRegisterOperand(dst.LowBits(), src);
1348 }
1349
1350
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1351 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1352 EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF8, SET_VEX_PP_66);
1353 }
1354
1355
paddw(XmmRegister dst,XmmRegister src)1356 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1358 EmitUint8(0x66);
1359 EmitOptionalRex32(dst, src);
1360 EmitUint8(0x0F);
1361 EmitUint8(0xFD);
1362 EmitXmmRegisterOperand(dst.LowBits(), src);
1363 }
1364
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1365 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1366 EmitVecArithAndLogicalOperation(
1367 dst, add_left, add_right, /*opcode=*/ 0xFD, SET_VEX_PP_66, /*is_commutative=*/ true);
1368 }
1369
1370
psubw(XmmRegister dst,XmmRegister src)1371 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1372 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1373 EmitUint8(0x66);
1374 EmitOptionalRex32(dst, src);
1375 EmitUint8(0x0F);
1376 EmitUint8(0xF9);
1377 EmitXmmRegisterOperand(dst.LowBits(), src);
1378 }
1379
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1380 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1381 EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF9, SET_VEX_PP_66);
1382 }
1383
1384
pmullw(XmmRegister dst,XmmRegister src)1385 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1386 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1387 EmitUint8(0x66);
1388 EmitOptionalRex32(dst, src);
1389 EmitUint8(0x0F);
1390 EmitUint8(0xD5);
1391 EmitXmmRegisterOperand(dst.LowBits(), src);
1392 }
1393
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)1394 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1395 EmitVecArithAndLogicalOperation(
1396 dst, src1, src2, /*opcode=*/ 0xD5, SET_VEX_PP_66, /*is_commutative=*/ true);
1397 }
1398
paddd(XmmRegister dst,XmmRegister src)1399 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
1400 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1401 EmitUint8(0x66);
1402 EmitOptionalRex32(dst, src);
1403 EmitUint8(0x0F);
1404 EmitUint8(0xFE);
1405 EmitXmmRegisterOperand(dst.LowBits(), src);
1406 }
1407
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1408 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1409 EmitVecArithAndLogicalOperation(
1410 dst, add_left, add_right, /*opcode=*/ 0xFE, SET_VEX_PP_66, /*is_commutative=*/ true);
1411 }
1412
psubd(XmmRegister dst,XmmRegister src)1413 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
1414 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1415 EmitUint8(0x66);
1416 EmitOptionalRex32(dst, src);
1417 EmitUint8(0x0F);
1418 EmitUint8(0xFA);
1419 EmitXmmRegisterOperand(dst.LowBits(), src);
1420 }
1421
1422
pmulld(XmmRegister dst,XmmRegister src)1423 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
1424 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1425 EmitUint8(0x66);
1426 EmitOptionalRex32(dst, src);
1427 EmitUint8(0x0F);
1428 EmitUint8(0x38);
1429 EmitUint8(0x40);
1430 EmitXmmRegisterOperand(dst.LowBits(), src);
1431 }
1432
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)1433 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1434 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1435 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1436 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1437 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
1438 X86_64ManagedRegister vvvv_reg =
1439 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1440 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1441 /*X=*/ false,
1442 src2.NeedsRex(),
1443 SET_VEX_M_0F_38);
1444 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1445 EmitUint8(ByteZero);
1446 EmitUint8(ByteOne);
1447 EmitUint8(ByteTwo);
1448 EmitUint8(0x40);
1449 EmitXmmRegisterOperand(dst.LowBits(), src2);
1450 }
1451
paddq(XmmRegister dst,XmmRegister src)1452 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
1453 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1454 EmitUint8(0x66);
1455 EmitOptionalRex32(dst, src);
1456 EmitUint8(0x0F);
1457 EmitUint8(0xD4);
1458 EmitXmmRegisterOperand(dst.LowBits(), src);
1459 }
1460
1461
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1462 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1463 EmitVecArithAndLogicalOperation(
1464 dst, add_left, add_right, /*opcode=*/ 0xD4, SET_VEX_PP_66, /*is_commutative=*/ true);
1465 }
1466
1467
psubq(XmmRegister dst,XmmRegister src)1468 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
1469 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1470 EmitUint8(0x66);
1471 EmitOptionalRex32(dst, src);
1472 EmitUint8(0x0F);
1473 EmitUint8(0xFB);
1474 EmitXmmRegisterOperand(dst.LowBits(), src);
1475 }
1476
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1477 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1478 EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFB, SET_VEX_PP_66);
1479 }
1480
1481
paddusb(XmmRegister dst,XmmRegister src)1482 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
1483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484 EmitUint8(0x66);
1485 EmitOptionalRex32(dst, src);
1486 EmitUint8(0x0F);
1487 EmitUint8(0xDC);
1488 EmitXmmRegisterOperand(dst.LowBits(), src);
1489 }
1490
1491
paddsb(XmmRegister dst,XmmRegister src)1492 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
1493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1494 EmitUint8(0x66);
1495 EmitOptionalRex32(dst, src);
1496 EmitUint8(0x0F);
1497 EmitUint8(0xEC);
1498 EmitXmmRegisterOperand(dst.LowBits(), src);
1499 }
1500
1501
paddusw(XmmRegister dst,XmmRegister src)1502 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
1503 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1504 EmitUint8(0x66);
1505 EmitOptionalRex32(dst, src);
1506 EmitUint8(0x0F);
1507 EmitUint8(0xDD);
1508 EmitXmmRegisterOperand(dst.LowBits(), src);
1509 }
1510
1511
paddsw(XmmRegister dst,XmmRegister src)1512 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
1513 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1514 EmitUint8(0x66);
1515 EmitOptionalRex32(dst, src);
1516 EmitUint8(0x0F);
1517 EmitUint8(0xED);
1518 EmitXmmRegisterOperand(dst.LowBits(), src);
1519 }
1520
1521
psubusb(XmmRegister dst,XmmRegister src)1522 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
1523 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1524 EmitUint8(0x66);
1525 EmitOptionalRex32(dst, src);
1526 EmitUint8(0x0F);
1527 EmitUint8(0xD8);
1528 EmitXmmRegisterOperand(dst.LowBits(), src);
1529 }
1530
1531
psubsb(XmmRegister dst,XmmRegister src)1532 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
1533 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1534 EmitUint8(0x66);
1535 EmitOptionalRex32(dst, src);
1536 EmitUint8(0x0F);
1537 EmitUint8(0xE8);
1538 EmitXmmRegisterOperand(dst.LowBits(), src);
1539 }
1540
1541
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1542 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1543 EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFA, SET_VEX_PP_66);
1544 }
1545
1546
psubusw(XmmRegister dst,XmmRegister src)1547 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
1548 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1549 EmitUint8(0x66);
1550 EmitOptionalRex32(dst, src);
1551 EmitUint8(0x0F);
1552 EmitUint8(0xD9);
1553 EmitXmmRegisterOperand(dst.LowBits(), src);
1554 }
1555
1556
psubsw(XmmRegister dst,XmmRegister src)1557 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
1558 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1559 EmitUint8(0x66);
1560 EmitOptionalRex32(dst, src);
1561 EmitUint8(0x0F);
1562 EmitUint8(0xE9);
1563 EmitXmmRegisterOperand(dst.LowBits(), src);
1564 }
1565
1566
cvtsi2ss(XmmRegister dst,CpuRegister src)1567 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
1568 cvtsi2ss(dst, src, false);
1569 }
1570
1571
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)1572 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
1573 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1574 EmitUint8(0xF3);
1575 if (is64bit) {
1576 // Emit a REX.W prefix if the operand size is 64 bits.
1577 EmitRex64(dst, src);
1578 } else {
1579 EmitOptionalRex32(dst, src);
1580 }
1581 EmitUint8(0x0F);
1582 EmitUint8(0x2A);
1583 EmitOperand(dst.LowBits(), Operand(src));
1584 }
1585
1586
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)1587 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
1588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1589 EmitUint8(0xF3);
1590 if (is64bit) {
1591 // Emit a REX.W prefix if the operand size is 64 bits.
1592 EmitRex64(dst, src);
1593 } else {
1594 EmitOptionalRex32(dst, src);
1595 }
1596 EmitUint8(0x0F);
1597 EmitUint8(0x2A);
1598 EmitOperand(dst.LowBits(), src);
1599 }
1600
1601
cvtsi2sd(XmmRegister dst,CpuRegister src)1602 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
1603 cvtsi2sd(dst, src, false);
1604 }
1605
1606
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)1607 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
1608 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1609 EmitUint8(0xF2);
1610 if (is64bit) {
1611 // Emit a REX.W prefix if the operand size is 64 bits.
1612 EmitRex64(dst, src);
1613 } else {
1614 EmitOptionalRex32(dst, src);
1615 }
1616 EmitUint8(0x0F);
1617 EmitUint8(0x2A);
1618 EmitOperand(dst.LowBits(), Operand(src));
1619 }
1620
1621
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)1622 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
1623 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1624 EmitUint8(0xF2);
1625 if (is64bit) {
1626 // Emit a REX.W prefix if the operand size is 64 bits.
1627 EmitRex64(dst, src);
1628 } else {
1629 EmitOptionalRex32(dst, src);
1630 }
1631 EmitUint8(0x0F);
1632 EmitUint8(0x2A);
1633 EmitOperand(dst.LowBits(), src);
1634 }
1635
1636
cvtss2si(CpuRegister dst,XmmRegister src)1637 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
1638 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1639 EmitUint8(0xF3);
1640 EmitOptionalRex32(dst, src);
1641 EmitUint8(0x0F);
1642 EmitUint8(0x2D);
1643 EmitXmmRegisterOperand(dst.LowBits(), src);
1644 }
1645
1646
cvtss2sd(XmmRegister dst,XmmRegister src)1647 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
1648 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1649 EmitUint8(0xF3);
1650 EmitOptionalRex32(dst, src);
1651 EmitUint8(0x0F);
1652 EmitUint8(0x5A);
1653 EmitXmmRegisterOperand(dst.LowBits(), src);
1654 }
1655
1656
cvtss2sd(XmmRegister dst,const Address & src)1657 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
1658 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1659 EmitUint8(0xF3);
1660 EmitOptionalRex32(dst, src);
1661 EmitUint8(0x0F);
1662 EmitUint8(0x5A);
1663 EmitOperand(dst.LowBits(), src);
1664 }
1665
1666
cvtsd2si(CpuRegister dst,XmmRegister src)1667 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
1668 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1669 EmitUint8(0xF2);
1670 EmitOptionalRex32(dst, src);
1671 EmitUint8(0x0F);
1672 EmitUint8(0x2D);
1673 EmitXmmRegisterOperand(dst.LowBits(), src);
1674 }
1675
1676
cvttss2si(CpuRegister dst,XmmRegister src)1677 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
1678 cvttss2si(dst, src, false);
1679 }
1680
1681
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)1682 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
1683 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1684 EmitUint8(0xF3);
1685 if (is64bit) {
1686 // Emit a REX.W prefix if the operand size is 64 bits.
1687 EmitRex64(dst, src);
1688 } else {
1689 EmitOptionalRex32(dst, src);
1690 }
1691 EmitUint8(0x0F);
1692 EmitUint8(0x2C);
1693 EmitXmmRegisterOperand(dst.LowBits(), src);
1694 }
1695
1696
cvttsd2si(CpuRegister dst,XmmRegister src)1697 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
1698 cvttsd2si(dst, src, false);
1699 }
1700
1701
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)1702 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
1703 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1704 EmitUint8(0xF2);
1705 if (is64bit) {
1706 // Emit a REX.W prefix if the operand size is 64 bits.
1707 EmitRex64(dst, src);
1708 } else {
1709 EmitOptionalRex32(dst, src);
1710 }
1711 EmitUint8(0x0F);
1712 EmitUint8(0x2C);
1713 EmitXmmRegisterOperand(dst.LowBits(), src);
1714 }
1715
1716
cvtsd2ss(XmmRegister dst,XmmRegister src)1717 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
1718 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1719 EmitUint8(0xF2);
1720 EmitOptionalRex32(dst, src);
1721 EmitUint8(0x0F);
1722 EmitUint8(0x5A);
1723 EmitXmmRegisterOperand(dst.LowBits(), src);
1724 }
1725
1726
cvtsd2ss(XmmRegister dst,const Address & src)1727 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
1728 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1729 EmitUint8(0xF2);
1730 EmitOptionalRex32(dst, src);
1731 EmitUint8(0x0F);
1732 EmitUint8(0x5A);
1733 EmitOperand(dst.LowBits(), src);
1734 }
1735
1736
cvtdq2ps(XmmRegister dst,XmmRegister src)1737 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
1738 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1739 EmitOptionalRex32(dst, src);
1740 EmitUint8(0x0F);
1741 EmitUint8(0x5B);
1742 EmitXmmRegisterOperand(dst.LowBits(), src);
1743 }
1744
1745
cvtdq2pd(XmmRegister dst,XmmRegister src)1746 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
1747 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1748 EmitUint8(0xF3);
1749 EmitOptionalRex32(dst, src);
1750 EmitUint8(0x0F);
1751 EmitUint8(0xE6);
1752 EmitXmmRegisterOperand(dst.LowBits(), src);
1753 }
1754
1755
comiss(XmmRegister a,XmmRegister b)1756 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
1757 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1758 EmitOptionalRex32(a, b);
1759 EmitUint8(0x0F);
1760 EmitUint8(0x2F);
1761 EmitXmmRegisterOperand(a.LowBits(), b);
1762 }
1763
1764
comiss(XmmRegister a,const Address & b)1765 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
1766 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1767 EmitOptionalRex32(a, b);
1768 EmitUint8(0x0F);
1769 EmitUint8(0x2F);
1770 EmitOperand(a.LowBits(), b);
1771 }
1772
1773
comisd(XmmRegister a,XmmRegister b)1774 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
1775 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1776 EmitUint8(0x66);
1777 EmitOptionalRex32(a, b);
1778 EmitUint8(0x0F);
1779 EmitUint8(0x2F);
1780 EmitXmmRegisterOperand(a.LowBits(), b);
1781 }
1782
1783
comisd(XmmRegister a,const Address & b)1784 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
1785 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1786 EmitUint8(0x66);
1787 EmitOptionalRex32(a, b);
1788 EmitUint8(0x0F);
1789 EmitUint8(0x2F);
1790 EmitOperand(a.LowBits(), b);
1791 }
1792
1793
ucomiss(XmmRegister a,XmmRegister b)1794 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
1795 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1796 EmitOptionalRex32(a, b);
1797 EmitUint8(0x0F);
1798 EmitUint8(0x2E);
1799 EmitXmmRegisterOperand(a.LowBits(), b);
1800 }
1801
1802
ucomiss(XmmRegister a,const Address & b)1803 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
1804 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1805 EmitOptionalRex32(a, b);
1806 EmitUint8(0x0F);
1807 EmitUint8(0x2E);
1808 EmitOperand(a.LowBits(), b);
1809 }
1810
1811
ucomisd(XmmRegister a,XmmRegister b)1812 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
1813 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1814 EmitUint8(0x66);
1815 EmitOptionalRex32(a, b);
1816 EmitUint8(0x0F);
1817 EmitUint8(0x2E);
1818 EmitXmmRegisterOperand(a.LowBits(), b);
1819 }
1820
1821
ucomisd(XmmRegister a,const Address & b)1822 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
1823 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1824 EmitUint8(0x66);
1825 EmitOptionalRex32(a, b);
1826 EmitUint8(0x0F);
1827 EmitUint8(0x2E);
1828 EmitOperand(a.LowBits(), b);
1829 }
1830
1831
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)1832 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
1833 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1834 EmitUint8(0x66);
1835 EmitOptionalRex32(dst, src);
1836 EmitUint8(0x0F);
1837 EmitUint8(0x3A);
1838 EmitUint8(0x0B);
1839 EmitXmmRegisterOperand(dst.LowBits(), src);
1840 EmitUint8(imm.value());
1841 }
1842
1843
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)1844 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
1845 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1846 EmitUint8(0x66);
1847 EmitOptionalRex32(dst, src);
1848 EmitUint8(0x0F);
1849 EmitUint8(0x3A);
1850 EmitUint8(0x0A);
1851 EmitXmmRegisterOperand(dst.LowBits(), src);
1852 EmitUint8(imm.value());
1853 }
1854
1855
sqrtsd(XmmRegister dst,XmmRegister src)1856 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
1857 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1858 EmitUint8(0xF2);
1859 EmitOptionalRex32(dst, src);
1860 EmitUint8(0x0F);
1861 EmitUint8(0x51);
1862 EmitXmmRegisterOperand(dst.LowBits(), src);
1863 }
1864
1865
sqrtss(XmmRegister dst,XmmRegister src)1866 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
1867 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1868 EmitUint8(0xF3);
1869 EmitOptionalRex32(dst, src);
1870 EmitUint8(0x0F);
1871 EmitUint8(0x51);
1872 EmitXmmRegisterOperand(dst.LowBits(), src);
1873 }
1874
1875
xorpd(XmmRegister dst,const Address & src)1876 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
1877 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1878 EmitUint8(0x66);
1879 EmitOptionalRex32(dst, src);
1880 EmitUint8(0x0F);
1881 EmitUint8(0x57);
1882 EmitOperand(dst.LowBits(), src);
1883 }
1884
1885
xorpd(XmmRegister dst,XmmRegister src)1886 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
1887 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1888 EmitUint8(0x66);
1889 EmitOptionalRex32(dst, src);
1890 EmitUint8(0x0F);
1891 EmitUint8(0x57);
1892 EmitXmmRegisterOperand(dst.LowBits(), src);
1893 }
1894
1895
xorps(XmmRegister dst,const Address & src)1896 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
1897 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1898 EmitOptionalRex32(dst, src);
1899 EmitUint8(0x0F);
1900 EmitUint8(0x57);
1901 EmitOperand(dst.LowBits(), src);
1902 }
1903
1904
xorps(XmmRegister dst,XmmRegister src)1905 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
1906 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1907 EmitOptionalRex32(dst, src);
1908 EmitUint8(0x0F);
1909 EmitUint8(0x57);
1910 EmitXmmRegisterOperand(dst.LowBits(), src);
1911 }
1912
pxor(XmmRegister dst,XmmRegister src)1913 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
1914 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1915 EmitUint8(0x66);
1916 EmitOptionalRex32(dst, src);
1917 EmitUint8(0x0F);
1918 EmitUint8(0xEF);
1919 EmitXmmRegisterOperand(dst.LowBits(), src);
1920 }
1921
1922 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)1923 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1924 EmitVecArithAndLogicalOperation(
1925 dst, src1, src2, /*opcode=*/ 0xEF, SET_VEX_PP_66, /*is_commutative=*/ true);
1926 }
1927
1928 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)1929 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1930 EmitVecArithAndLogicalOperation(
1931 dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_NONE, /*is_commutative=*/ true);
1932 }
1933
1934 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1935 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1936 EmitVecArithAndLogicalOperation(
1937 dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_66, /*is_commutative=*/ true);
1938 }
1939
andpd(XmmRegister dst,const Address & src)1940 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
1941 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1942 EmitUint8(0x66);
1943 EmitOptionalRex32(dst, src);
1944 EmitUint8(0x0F);
1945 EmitUint8(0x54);
1946 EmitOperand(dst.LowBits(), src);
1947 }
1948
andpd(XmmRegister dst,XmmRegister src)1949 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
1950 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1951 EmitUint8(0x66);
1952 EmitOptionalRex32(dst, src);
1953 EmitUint8(0x0F);
1954 EmitUint8(0x54);
1955 EmitXmmRegisterOperand(dst.LowBits(), src);
1956 }
1957
andps(XmmRegister dst,XmmRegister src)1958 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
1959 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1960 EmitOptionalRex32(dst, src);
1961 EmitUint8(0x0F);
1962 EmitUint8(0x54);
1963 EmitXmmRegisterOperand(dst.LowBits(), src);
1964 }
1965
pand(XmmRegister dst,XmmRegister src)1966 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
1967 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1968 EmitUint8(0x66);
1969 EmitOptionalRex32(dst, src);
1970 EmitUint8(0x0F);
1971 EmitUint8(0xDB);
1972 EmitXmmRegisterOperand(dst.LowBits(), src);
1973 }
1974
1975 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)1976 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1977 EmitVecArithAndLogicalOperation(
1978 dst, src1, src2, /*opcode=*/ 0xDB, SET_VEX_PP_66, /*is_commutative=*/ true);
1979 }
1980
1981 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)1982 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1983 EmitVecArithAndLogicalOperation(
1984 dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_NONE, /*is_commutative=*/ true);
1985 }
1986
1987 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1988 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1989 EmitVecArithAndLogicalOperation(
1990 dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_66, /*is_commutative=*/ true);
1991 }
1992
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)1993 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
1994 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1995 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1996 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
1997 /*X=*/ false,
1998 src2.NeedsRex(),
1999 SET_VEX_M_0F_38);
2000 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2001 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2002 SET_VEX_L_128,
2003 SET_VEX_PP_NONE);
2004 EmitUint8(byte_zero);
2005 EmitUint8(byte_one);
2006 EmitUint8(byte_two);
2007 // Opcode field
2008 EmitUint8(0xF2);
2009 EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2010 }
2011
andnpd(XmmRegister dst,XmmRegister src)2012 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2013 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2014 EmitUint8(0x66);
2015 EmitOptionalRex32(dst, src);
2016 EmitUint8(0x0F);
2017 EmitUint8(0x55);
2018 EmitXmmRegisterOperand(dst.LowBits(), src);
2019 }
2020
andnps(XmmRegister dst,XmmRegister src)2021 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2022 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2023 EmitOptionalRex32(dst, src);
2024 EmitUint8(0x0F);
2025 EmitUint8(0x55);
2026 EmitXmmRegisterOperand(dst.LowBits(), src);
2027 }
2028
pandn(XmmRegister dst,XmmRegister src)2029 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2030 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2031 EmitUint8(0x66);
2032 EmitOptionalRex32(dst, src);
2033 EmitUint8(0x0F);
2034 EmitUint8(0xDF);
2035 EmitXmmRegisterOperand(dst.LowBits(), src);
2036 }
2037
2038 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2039 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2040 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0xDF, SET_VEX_PP_66);
2041 }
2042
2043 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2044 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2045 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_NONE);
2046 }
2047
2048 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2049 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2050 EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_66);
2051 }
2052
orpd(XmmRegister dst,XmmRegister src)2053 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2054 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2055 EmitUint8(0x66);
2056 EmitOptionalRex32(dst, src);
2057 EmitUint8(0x0F);
2058 EmitUint8(0x56);
2059 EmitXmmRegisterOperand(dst.LowBits(), src);
2060 }
2061
orps(XmmRegister dst,XmmRegister src)2062 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
2063 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2064 EmitOptionalRex32(dst, src);
2065 EmitUint8(0x0F);
2066 EmitUint8(0x56);
2067 EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069
por(XmmRegister dst,XmmRegister src)2070 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
2071 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2072 EmitUint8(0x66);
2073 EmitOptionalRex32(dst, src);
2074 EmitUint8(0x0F);
2075 EmitUint8(0xEB);
2076 EmitXmmRegisterOperand(dst.LowBits(), src);
2077 }
2078
2079 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2080 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2081 EmitVecArithAndLogicalOperation(
2082 dst, src1, src2, /*opcode=*/ 0xEB, SET_VEX_PP_66, /*is_commutative=*/ true);
2083 }
2084
2085 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2086 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2087 EmitVecArithAndLogicalOperation(
2088 dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_NONE, /*is_commutative=*/ true);
2089 }
2090
2091 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2092 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2093 EmitVecArithAndLogicalOperation(
2094 dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_66, /*is_commutative=*/ true);
2095 }
2096
pavgb(XmmRegister dst,XmmRegister src)2097 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
2098 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2099 EmitUint8(0x66);
2100 EmitOptionalRex32(dst, src);
2101 EmitUint8(0x0F);
2102 EmitUint8(0xE0);
2103 EmitXmmRegisterOperand(dst.LowBits(), src);
2104 }
2105
pavgw(XmmRegister dst,XmmRegister src)2106 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
2107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2108 EmitUint8(0x66);
2109 EmitOptionalRex32(dst, src);
2110 EmitUint8(0x0F);
2111 EmitUint8(0xE3);
2112 EmitXmmRegisterOperand(dst.LowBits(), src);
2113 }
2114
psadbw(XmmRegister dst,XmmRegister src)2115 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
2116 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2117 EmitUint8(0x66);
2118 EmitOptionalRex32(dst, src);
2119 EmitUint8(0x0F);
2120 EmitUint8(0xF6);
2121 EmitXmmRegisterOperand(dst.LowBits(), src);
2122 }
2123
pmaddwd(XmmRegister dst,XmmRegister src)2124 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
2125 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2126 EmitUint8(0x66);
2127 EmitOptionalRex32(dst, src);
2128 EmitUint8(0x0F);
2129 EmitUint8(0xF5);
2130 EmitXmmRegisterOperand(dst.LowBits(), src);
2131 }
2132
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2133 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2134 EmitVecArithAndLogicalOperation(
2135 dst, src1, src2, /*opcode=*/ 0xF5, SET_VEX_PP_66, /*is_commutative=*/ true);
2136 }
2137
phaddw(XmmRegister dst,XmmRegister src)2138 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
2139 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2140 EmitUint8(0x66);
2141 EmitOptionalRex32(dst, src);
2142 EmitUint8(0x0F);
2143 EmitUint8(0x38);
2144 EmitUint8(0x01);
2145 EmitXmmRegisterOperand(dst.LowBits(), src);
2146 }
2147
phaddd(XmmRegister dst,XmmRegister src)2148 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
2149 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2150 EmitUint8(0x66);
2151 EmitOptionalRex32(dst, src);
2152 EmitUint8(0x0F);
2153 EmitUint8(0x38);
2154 EmitUint8(0x02);
2155 EmitXmmRegisterOperand(dst.LowBits(), src);
2156 }
2157
haddps(XmmRegister dst,XmmRegister src)2158 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
2159 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2160 EmitUint8(0xF2);
2161 EmitOptionalRex32(dst, src);
2162 EmitUint8(0x0F);
2163 EmitUint8(0x7C);
2164 EmitXmmRegisterOperand(dst.LowBits(), src);
2165 }
2166
haddpd(XmmRegister dst,XmmRegister src)2167 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
2168 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2169 EmitUint8(0x66);
2170 EmitOptionalRex32(dst, src);
2171 EmitUint8(0x0F);
2172 EmitUint8(0x7C);
2173 EmitXmmRegisterOperand(dst.LowBits(), src);
2174 }
2175
phsubw(XmmRegister dst,XmmRegister src)2176 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
2177 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2178 EmitUint8(0x66);
2179 EmitOptionalRex32(dst, src);
2180 EmitUint8(0x0F);
2181 EmitUint8(0x38);
2182 EmitUint8(0x05);
2183 EmitXmmRegisterOperand(dst.LowBits(), src);
2184 }
2185
phsubd(XmmRegister dst,XmmRegister src)2186 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
2187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2188 EmitUint8(0x66);
2189 EmitOptionalRex32(dst, src);
2190 EmitUint8(0x0F);
2191 EmitUint8(0x38);
2192 EmitUint8(0x06);
2193 EmitXmmRegisterOperand(dst.LowBits(), src);
2194 }
2195
hsubps(XmmRegister dst,XmmRegister src)2196 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
2197 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2198 EmitUint8(0xF2);
2199 EmitOptionalRex32(dst, src);
2200 EmitUint8(0x0F);
2201 EmitUint8(0x7D);
2202 EmitXmmRegisterOperand(dst.LowBits(), src);
2203 }
2204
hsubpd(XmmRegister dst,XmmRegister src)2205 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
2206 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2207 EmitUint8(0x66);
2208 EmitOptionalRex32(dst, src);
2209 EmitUint8(0x0F);
2210 EmitUint8(0x7D);
2211 EmitXmmRegisterOperand(dst.LowBits(), src);
2212 }
2213
pminsb(XmmRegister dst,XmmRegister src)2214 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
2215 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2216 EmitUint8(0x66);
2217 EmitOptionalRex32(dst, src);
2218 EmitUint8(0x0F);
2219 EmitUint8(0x38);
2220 EmitUint8(0x38);
2221 EmitXmmRegisterOperand(dst.LowBits(), src);
2222 }
2223
pmaxsb(XmmRegister dst,XmmRegister src)2224 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
2225 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2226 EmitUint8(0x66);
2227 EmitOptionalRex32(dst, src);
2228 EmitUint8(0x0F);
2229 EmitUint8(0x38);
2230 EmitUint8(0x3C);
2231 EmitXmmRegisterOperand(dst.LowBits(), src);
2232 }
2233
pminsw(XmmRegister dst,XmmRegister src)2234 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
2235 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2236 EmitUint8(0x66);
2237 EmitOptionalRex32(dst, src);
2238 EmitUint8(0x0F);
2239 EmitUint8(0xEA);
2240 EmitXmmRegisterOperand(dst.LowBits(), src);
2241 }
2242
pmaxsw(XmmRegister dst,XmmRegister src)2243 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
2244 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2245 EmitUint8(0x66);
2246 EmitOptionalRex32(dst, src);
2247 EmitUint8(0x0F);
2248 EmitUint8(0xEE);
2249 EmitXmmRegisterOperand(dst.LowBits(), src);
2250 }
2251
pminsd(XmmRegister dst,XmmRegister src)2252 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
2253 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2254 EmitUint8(0x66);
2255 EmitOptionalRex32(dst, src);
2256 EmitUint8(0x0F);
2257 EmitUint8(0x38);
2258 EmitUint8(0x39);
2259 EmitXmmRegisterOperand(dst.LowBits(), src);
2260 }
2261
pmaxsd(XmmRegister dst,XmmRegister src)2262 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
2263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2264 EmitUint8(0x66);
2265 EmitOptionalRex32(dst, src);
2266 EmitUint8(0x0F);
2267 EmitUint8(0x38);
2268 EmitUint8(0x3D);
2269 EmitXmmRegisterOperand(dst.LowBits(), src);
2270 }
2271
pminub(XmmRegister dst,XmmRegister src)2272 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
2273 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2274 EmitUint8(0x66);
2275 EmitOptionalRex32(dst, src);
2276 EmitUint8(0x0F);
2277 EmitUint8(0xDA);
2278 EmitXmmRegisterOperand(dst.LowBits(), src);
2279 }
2280
pmaxub(XmmRegister dst,XmmRegister src)2281 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
2282 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2283 EmitUint8(0x66);
2284 EmitOptionalRex32(dst, src);
2285 EmitUint8(0x0F);
2286 EmitUint8(0xDE);
2287 EmitXmmRegisterOperand(dst.LowBits(), src);
2288 }
2289
pminuw(XmmRegister dst,XmmRegister src)2290 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
2291 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2292 EmitUint8(0x66);
2293 EmitOptionalRex32(dst, src);
2294 EmitUint8(0x0F);
2295 EmitUint8(0x38);
2296 EmitUint8(0x3A);
2297 EmitXmmRegisterOperand(dst.LowBits(), src);
2298 }
2299
pmaxuw(XmmRegister dst,XmmRegister src)2300 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
2301 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2302 EmitUint8(0x66);
2303 EmitOptionalRex32(dst, src);
2304 EmitUint8(0x0F);
2305 EmitUint8(0x38);
2306 EmitUint8(0x3E);
2307 EmitXmmRegisterOperand(dst.LowBits(), src);
2308 }
2309
pminud(XmmRegister dst,XmmRegister src)2310 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
2311 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2312 EmitUint8(0x66);
2313 EmitOptionalRex32(dst, src);
2314 EmitUint8(0x0F);
2315 EmitUint8(0x38);
2316 EmitUint8(0x3B);
2317 EmitXmmRegisterOperand(dst.LowBits(), src);
2318 }
2319
pmaxud(XmmRegister dst,XmmRegister src)2320 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
2321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2322 EmitUint8(0x66);
2323 EmitOptionalRex32(dst, src);
2324 EmitUint8(0x0F);
2325 EmitUint8(0x38);
2326 EmitUint8(0x3F);
2327 EmitXmmRegisterOperand(dst.LowBits(), src);
2328 }
2329
minps(XmmRegister dst,XmmRegister src)2330 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
2331 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2332 EmitOptionalRex32(dst, src);
2333 EmitUint8(0x0F);
2334 EmitUint8(0x5D);
2335 EmitXmmRegisterOperand(dst.LowBits(), src);
2336 }
2337
maxps(XmmRegister dst,XmmRegister src)2338 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
2339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2340 EmitOptionalRex32(dst, src);
2341 EmitUint8(0x0F);
2342 EmitUint8(0x5F);
2343 EmitXmmRegisterOperand(dst.LowBits(), src);
2344 }
2345
minpd(XmmRegister dst,XmmRegister src)2346 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
2347 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2348 EmitUint8(0x66);
2349 EmitOptionalRex32(dst, src);
2350 EmitUint8(0x0F);
2351 EmitUint8(0x5D);
2352 EmitXmmRegisterOperand(dst.LowBits(), src);
2353 }
2354
maxpd(XmmRegister dst,XmmRegister src)2355 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
2356 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2357 EmitUint8(0x66);
2358 EmitOptionalRex32(dst, src);
2359 EmitUint8(0x0F);
2360 EmitUint8(0x5F);
2361 EmitXmmRegisterOperand(dst.LowBits(), src);
2362 }
2363
pcmpeqb(XmmRegister dst,XmmRegister src)2364 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
2365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2366 EmitUint8(0x66);
2367 EmitOptionalRex32(dst, src);
2368 EmitUint8(0x0F);
2369 EmitUint8(0x74);
2370 EmitXmmRegisterOperand(dst.LowBits(), src);
2371 }
2372
pcmpeqw(XmmRegister dst,XmmRegister src)2373 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
2374 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2375 EmitUint8(0x66);
2376 EmitOptionalRex32(dst, src);
2377 EmitUint8(0x0F);
2378 EmitUint8(0x75);
2379 EmitXmmRegisterOperand(dst.LowBits(), src);
2380 }
2381
pcmpeqd(XmmRegister dst,XmmRegister src)2382 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
2383 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2384 EmitUint8(0x66);
2385 EmitOptionalRex32(dst, src);
2386 EmitUint8(0x0F);
2387 EmitUint8(0x76);
2388 EmitXmmRegisterOperand(dst.LowBits(), src);
2389 }
2390
pcmpeqq(XmmRegister dst,XmmRegister src)2391 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
2392 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2393 EmitUint8(0x66);
2394 EmitOptionalRex32(dst, src);
2395 EmitUint8(0x0F);
2396 EmitUint8(0x38);
2397 EmitUint8(0x29);
2398 EmitXmmRegisterOperand(dst.LowBits(), src);
2399 }
2400
pcmpgtb(XmmRegister dst,XmmRegister src)2401 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
2402 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2403 EmitUint8(0x66);
2404 EmitOptionalRex32(dst, src);
2405 EmitUint8(0x0F);
2406 EmitUint8(0x64);
2407 EmitXmmRegisterOperand(dst.LowBits(), src);
2408 }
2409
pcmpgtw(XmmRegister dst,XmmRegister src)2410 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
2411 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2412 EmitUint8(0x66);
2413 EmitOptionalRex32(dst, src);
2414 EmitUint8(0x0F);
2415 EmitUint8(0x65);
2416 EmitXmmRegisterOperand(dst.LowBits(), src);
2417 }
2418
pcmpgtd(XmmRegister dst,XmmRegister src)2419 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
2420 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2421 EmitUint8(0x66);
2422 EmitOptionalRex32(dst, src);
2423 EmitUint8(0x0F);
2424 EmitUint8(0x66);
2425 EmitXmmRegisterOperand(dst.LowBits(), src);
2426 }
2427
pcmpgtq(XmmRegister dst,XmmRegister src)2428 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
2429 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2430 EmitUint8(0x66);
2431 EmitOptionalRex32(dst, src);
2432 EmitUint8(0x0F);
2433 EmitUint8(0x38);
2434 EmitUint8(0x37);
2435 EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)2438 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2439 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2440 EmitUint8(0x66);
2441 EmitOptionalRex32(dst, src);
2442 EmitUint8(0x0F);
2443 EmitUint8(0xC6);
2444 EmitXmmRegisterOperand(dst.LowBits(), src);
2445 EmitUint8(imm.value());
2446 }
2447
2448
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)2449 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2450 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451 EmitOptionalRex32(dst, src);
2452 EmitUint8(0x0F);
2453 EmitUint8(0xC6);
2454 EmitXmmRegisterOperand(dst.LowBits(), src);
2455 EmitUint8(imm.value());
2456 }
2457
2458
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)2459 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2460 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461 EmitUint8(0x66);
2462 EmitOptionalRex32(dst, src);
2463 EmitUint8(0x0F);
2464 EmitUint8(0x70);
2465 EmitXmmRegisterOperand(dst.LowBits(), src);
2466 EmitUint8(imm.value());
2467 }
2468
2469
punpcklbw(XmmRegister dst,XmmRegister src)2470 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
2471 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2472 EmitUint8(0x66);
2473 EmitOptionalRex32(dst, src);
2474 EmitUint8(0x0F);
2475 EmitUint8(0x60);
2476 EmitXmmRegisterOperand(dst.LowBits(), src);
2477 }
2478
2479
punpcklwd(XmmRegister dst,XmmRegister src)2480 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
2481 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2482 EmitUint8(0x66);
2483 EmitOptionalRex32(dst, src);
2484 EmitUint8(0x0F);
2485 EmitUint8(0x61);
2486 EmitXmmRegisterOperand(dst.LowBits(), src);
2487 }
2488
2489
punpckldq(XmmRegister dst,XmmRegister src)2490 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
2491 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2492 EmitUint8(0x66);
2493 EmitOptionalRex32(dst, src);
2494 EmitUint8(0x0F);
2495 EmitUint8(0x62);
2496 EmitXmmRegisterOperand(dst.LowBits(), src);
2497 }
2498
2499
punpcklqdq(XmmRegister dst,XmmRegister src)2500 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
2501 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2502 EmitUint8(0x66);
2503 EmitOptionalRex32(dst, src);
2504 EmitUint8(0x0F);
2505 EmitUint8(0x6C);
2506 EmitXmmRegisterOperand(dst.LowBits(), src);
2507 }
2508
2509
punpckhbw(XmmRegister dst,XmmRegister src)2510 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
2511 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2512 EmitUint8(0x66);
2513 EmitOptionalRex32(dst, src);
2514 EmitUint8(0x0F);
2515 EmitUint8(0x68);
2516 EmitXmmRegisterOperand(dst.LowBits(), src);
2517 }
2518
2519
punpckhwd(XmmRegister dst,XmmRegister src)2520 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
2521 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2522 EmitUint8(0x66);
2523 EmitOptionalRex32(dst, src);
2524 EmitUint8(0x0F);
2525 EmitUint8(0x69);
2526 EmitXmmRegisterOperand(dst.LowBits(), src);
2527 }
2528
2529
punpckhdq(XmmRegister dst,XmmRegister src)2530 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
2531 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2532 EmitUint8(0x66);
2533 EmitOptionalRex32(dst, src);
2534 EmitUint8(0x0F);
2535 EmitUint8(0x6A);
2536 EmitXmmRegisterOperand(dst.LowBits(), src);
2537 }
2538
2539
punpckhqdq(XmmRegister dst,XmmRegister src)2540 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
2541 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2542 EmitUint8(0x66);
2543 EmitOptionalRex32(dst, src);
2544 EmitUint8(0x0F);
2545 EmitUint8(0x6D);
2546 EmitXmmRegisterOperand(dst.LowBits(), src);
2547 }
2548
2549
psllw(XmmRegister reg,const Immediate & shift_count)2550 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
2551 DCHECK(shift_count.is_uint8());
2552 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2553 EmitUint8(0x66);
2554 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2555 EmitUint8(0x0F);
2556 EmitUint8(0x71);
2557 EmitXmmRegisterOperand(6, reg);
2558 EmitUint8(shift_count.value());
2559 }
2560
2561
pslld(XmmRegister reg,const Immediate & shift_count)2562 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
2563 DCHECK(shift_count.is_uint8());
2564 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2565 EmitUint8(0x66);
2566 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2567 EmitUint8(0x0F);
2568 EmitUint8(0x72);
2569 EmitXmmRegisterOperand(6, reg);
2570 EmitUint8(shift_count.value());
2571 }
2572
2573
psllq(XmmRegister reg,const Immediate & shift_count)2574 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
2575 DCHECK(shift_count.is_uint8());
2576 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2577 EmitUint8(0x66);
2578 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2579 EmitUint8(0x0F);
2580 EmitUint8(0x73);
2581 EmitXmmRegisterOperand(6, reg);
2582 EmitUint8(shift_count.value());
2583 }
2584
2585
psraw(XmmRegister reg,const Immediate & shift_count)2586 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
2587 DCHECK(shift_count.is_uint8());
2588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2589 EmitUint8(0x66);
2590 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2591 EmitUint8(0x0F);
2592 EmitUint8(0x71);
2593 EmitXmmRegisterOperand(4, reg);
2594 EmitUint8(shift_count.value());
2595 }
2596
2597
psrad(XmmRegister reg,const Immediate & shift_count)2598 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
2599 DCHECK(shift_count.is_uint8());
2600 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2601 EmitUint8(0x66);
2602 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2603 EmitUint8(0x0F);
2604 EmitUint8(0x72);
2605 EmitXmmRegisterOperand(4, reg);
2606 EmitUint8(shift_count.value());
2607 }
2608
2609
psrlw(XmmRegister reg,const Immediate & shift_count)2610 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
2611 DCHECK(shift_count.is_uint8());
2612 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2613 EmitUint8(0x66);
2614 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2615 EmitUint8(0x0F);
2616 EmitUint8(0x71);
2617 EmitXmmRegisterOperand(2, reg);
2618 EmitUint8(shift_count.value());
2619 }
2620
2621
psrld(XmmRegister reg,const Immediate & shift_count)2622 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
2623 DCHECK(shift_count.is_uint8());
2624 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2625 EmitUint8(0x66);
2626 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2627 EmitUint8(0x0F);
2628 EmitUint8(0x72);
2629 EmitXmmRegisterOperand(2, reg);
2630 EmitUint8(shift_count.value());
2631 }
2632
2633
psrlq(XmmRegister reg,const Immediate & shift_count)2634 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
2635 DCHECK(shift_count.is_uint8());
2636 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637 EmitUint8(0x66);
2638 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2639 EmitUint8(0x0F);
2640 EmitUint8(0x73);
2641 EmitXmmRegisterOperand(2, reg);
2642 EmitUint8(shift_count.value());
2643 }
2644
2645
psrldq(XmmRegister reg,const Immediate & shift_count)2646 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
2647 DCHECK(shift_count.is_uint8());
2648 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2649 EmitUint8(0x66);
2650 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
2651 EmitUint8(0x0F);
2652 EmitUint8(0x73);
2653 EmitXmmRegisterOperand(3, reg);
2654 EmitUint8(shift_count.value());
2655 }
2656
2657
fldl(const Address & src)2658 void X86_64Assembler::fldl(const Address& src) {
2659 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2660 EmitUint8(0xDD);
2661 EmitOperand(0, src);
2662 }
2663
2664
fstl(const Address & dst)2665 void X86_64Assembler::fstl(const Address& dst) {
2666 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2667 EmitUint8(0xDD);
2668 EmitOperand(2, dst);
2669 }
2670
2671
fstpl(const Address & dst)2672 void X86_64Assembler::fstpl(const Address& dst) {
2673 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2674 EmitUint8(0xDD);
2675 EmitOperand(3, dst);
2676 }
2677
2678
fstsw()2679 void X86_64Assembler::fstsw() {
2680 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2681 EmitUint8(0x9B);
2682 EmitUint8(0xDF);
2683 EmitUint8(0xE0);
2684 }
2685
2686
fnstcw(const Address & dst)2687 void X86_64Assembler::fnstcw(const Address& dst) {
2688 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2689 EmitUint8(0xD9);
2690 EmitOperand(7, dst);
2691 }
2692
2693
fldcw(const Address & src)2694 void X86_64Assembler::fldcw(const Address& src) {
2695 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2696 EmitUint8(0xD9);
2697 EmitOperand(5, src);
2698 }
2699
2700
fistpl(const Address & dst)2701 void X86_64Assembler::fistpl(const Address& dst) {
2702 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2703 EmitUint8(0xDF);
2704 EmitOperand(7, dst);
2705 }
2706
2707
fistps(const Address & dst)2708 void X86_64Assembler::fistps(const Address& dst) {
2709 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2710 EmitUint8(0xDB);
2711 EmitOperand(3, dst);
2712 }
2713
2714
fildl(const Address & src)2715 void X86_64Assembler::fildl(const Address& src) {
2716 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2717 EmitUint8(0xDF);
2718 EmitOperand(5, src);
2719 }
2720
2721
filds(const Address & src)2722 void X86_64Assembler::filds(const Address& src) {
2723 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2724 EmitUint8(0xDB);
2725 EmitOperand(0, src);
2726 }
2727
2728
fincstp()2729 void X86_64Assembler::fincstp() {
2730 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2731 EmitUint8(0xD9);
2732 EmitUint8(0xF7);
2733 }
2734
2735
ffree(const Immediate & index)2736 void X86_64Assembler::ffree(const Immediate& index) {
2737 CHECK_LT(index.value(), 7);
2738 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2739 EmitUint8(0xDD);
2740 EmitUint8(0xC0 + index.value());
2741 }
2742
2743
fsin()2744 void X86_64Assembler::fsin() {
2745 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2746 EmitUint8(0xD9);
2747 EmitUint8(0xFE);
2748 }
2749
2750
fcos()2751 void X86_64Assembler::fcos() {
2752 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2753 EmitUint8(0xD9);
2754 EmitUint8(0xFF);
2755 }
2756
2757
fptan()2758 void X86_64Assembler::fptan() {
2759 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2760 EmitUint8(0xD9);
2761 EmitUint8(0xF2);
2762 }
2763
fucompp()2764 void X86_64Assembler::fucompp() {
2765 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2766 EmitUint8(0xDA);
2767 EmitUint8(0xE9);
2768 }
2769
2770
fprem()2771 void X86_64Assembler::fprem() {
2772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773 EmitUint8(0xD9);
2774 EmitUint8(0xF8);
2775 }
2776
2777
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))2778 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
2779 CpuRegister src,
2780 void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
2781 Register src_reg = src.AsRegister();
2782 Register dst_reg = dst.AsRegister();
2783 if (src_reg != RAX && dst_reg != RAX) {
2784 return false;
2785 }
2786 if (dst_reg == RAX) {
2787 std::swap(src_reg, dst_reg);
2788 }
2789 if (dst_reg != RAX) {
2790 // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
2791 (this->*prefix_fn)(CpuRegister(dst_reg));
2792 }
2793 EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
2794 return true;
2795 }
2796
2797
xchgb(CpuRegister dst,CpuRegister src)2798 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
2799 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2800 // There is no short version for AL.
2801 EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
2802 EmitUint8(0x86);
2803 EmitRegisterOperand(dst.LowBits(), src.LowBits());
2804 }
2805
2806
xchgb(CpuRegister reg,const Address & address)2807 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
2808 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2809 EmitOptionalByteRegNormalizingRex32(reg, address);
2810 EmitUint8(0x86);
2811 EmitOperand(reg.LowBits(), address);
2812 }
2813
2814
xchgw(CpuRegister dst,CpuRegister src)2815 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
2816 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2817 EmitOperandSizeOverride();
2818 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
2819 // A short version for AX.
2820 return;
2821 }
2822 // General case.
2823 EmitOptionalRex32(dst, src);
2824 EmitUint8(0x87);
2825 EmitRegisterOperand(dst.LowBits(), src.LowBits());
2826 }
2827
2828
xchgw(CpuRegister reg,const Address & address)2829 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
2830 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2831 EmitOperandSizeOverride();
2832 EmitOptionalRex32(reg, address);
2833 EmitUint8(0x87);
2834 EmitOperand(reg.LowBits(), address);
2835 }
2836
2837
xchgl(CpuRegister dst,CpuRegister src)2838 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
2839 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2840 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
2841 // A short version for EAX.
2842 return;
2843 }
2844 // General case.
2845 EmitOptionalRex32(dst, src);
2846 EmitUint8(0x87);
2847 EmitRegisterOperand(dst.LowBits(), src.LowBits());
2848 }
2849
2850
xchgl(CpuRegister reg,const Address & address)2851 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
2852 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2853 EmitOptionalRex32(reg, address);
2854 EmitUint8(0x87);
2855 EmitOperand(reg.LowBits(), address);
2856 }
2857
2858
xchgq(CpuRegister dst,CpuRegister src)2859 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
2860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
2862 // A short version for RAX.
2863 return;
2864 }
2865 // General case.
2866 EmitRex64(dst, src);
2867 EmitUint8(0x87);
2868 EmitRegisterOperand(dst.LowBits(), src.LowBits());
2869 }
2870
2871
xchgq(CpuRegister reg,const Address & address)2872 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
2873 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2874 EmitRex64(reg, address);
2875 EmitUint8(0x87);
2876 EmitOperand(reg.LowBits(), address);
2877 }
2878
2879
xaddb(CpuRegister dst,CpuRegister src)2880 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
2881 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2882 EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
2883 EmitUint8(0x0F);
2884 EmitUint8(0xC0);
2885 EmitRegisterOperand(src.LowBits(), dst.LowBits());
2886 }
2887
2888
xaddb(const Address & address,CpuRegister reg)2889 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
2890 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2891 EmitOptionalByteRegNormalizingRex32(reg, address);
2892 EmitUint8(0x0F);
2893 EmitUint8(0xC0);
2894 EmitOperand(reg.LowBits(), address);
2895 }
2896
2897
xaddw(CpuRegister dst,CpuRegister src)2898 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
2899 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2900 EmitOperandSizeOverride();
2901 EmitOptionalRex32(src, dst);
2902 EmitUint8(0x0F);
2903 EmitUint8(0xC1);
2904 EmitRegisterOperand(src.LowBits(), dst.LowBits());
2905 }
2906
2907
xaddw(const Address & address,CpuRegister reg)2908 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
2909 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2910 EmitOperandSizeOverride();
2911 EmitOptionalRex32(reg, address);
2912 EmitUint8(0x0F);
2913 EmitUint8(0xC1);
2914 EmitOperand(reg.LowBits(), address);
2915 }
2916
2917
xaddl(CpuRegister dst,CpuRegister src)2918 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
2919 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2920 EmitOptionalRex32(src, dst);
2921 EmitUint8(0x0F);
2922 EmitUint8(0xC1);
2923 EmitRegisterOperand(src.LowBits(), dst.LowBits());
2924 }
2925
2926
xaddl(const Address & address,CpuRegister reg)2927 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
2928 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2929 EmitOptionalRex32(reg, address);
2930 EmitUint8(0x0F);
2931 EmitUint8(0xC1);
2932 EmitOperand(reg.LowBits(), address);
2933 }
2934
2935
xaddq(CpuRegister dst,CpuRegister src)2936 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
2937 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938 EmitRex64(src, dst);
2939 EmitUint8(0x0F);
2940 EmitUint8(0xC1);
2941 EmitRegisterOperand(src.LowBits(), dst.LowBits());
2942 }
2943
2944
xaddq(const Address & address,CpuRegister reg)2945 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
2946 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2947 EmitRex64(reg, address);
2948 EmitUint8(0x0F);
2949 EmitUint8(0xC1);
2950 EmitOperand(reg.LowBits(), address);
2951 }
2952
2953
cmpb(const Address & address,const Immediate & imm)2954 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
2955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2956 CHECK(imm.is_int32());
2957 EmitOptionalRex32(address);
2958 EmitUint8(0x80);
2959 EmitOperand(7, address);
2960 EmitUint8(imm.value() & 0xFF);
2961 }
2962
2963
cmpw(const Address & address,const Immediate & imm)2964 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
2965 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2966 CHECK(imm.is_int32());
2967 EmitOperandSizeOverride();
2968 EmitOptionalRex32(address);
2969 EmitComplex(7, address, imm, /* is_16_op= */ true);
2970 }
2971
2972
cmpl(CpuRegister reg,const Immediate & imm)2973 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
2974 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975 CHECK(imm.is_int32());
2976 EmitOptionalRex32(reg);
2977 EmitComplex(7, Operand(reg), imm);
2978 }
2979
2980
cmpl(CpuRegister reg0,CpuRegister reg1)2981 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
2982 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2983 EmitOptionalRex32(reg0, reg1);
2984 EmitUint8(0x3B);
2985 EmitOperand(reg0.LowBits(), Operand(reg1));
2986 }
2987
2988
cmpl(CpuRegister reg,const Address & address)2989 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
2990 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2991 EmitOptionalRex32(reg, address);
2992 EmitUint8(0x3B);
2993 EmitOperand(reg.LowBits(), address);
2994 }
2995
2996
cmpl(const Address & address,CpuRegister reg)2997 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
2998 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2999 EmitOptionalRex32(reg, address);
3000 EmitUint8(0x39);
3001 EmitOperand(reg.LowBits(), address);
3002 }
3003
3004
cmpl(const Address & address,const Immediate & imm)3005 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3006 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3007 CHECK(imm.is_int32());
3008 EmitOptionalRex32(address);
3009 EmitComplex(7, address, imm);
3010 }
3011
3012
cmpq(CpuRegister reg0,CpuRegister reg1)3013 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3014 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3015 EmitRex64(reg0, reg1);
3016 EmitUint8(0x3B);
3017 EmitOperand(reg0.LowBits(), Operand(reg1));
3018 }
3019
3020
cmpq(CpuRegister reg,const Immediate & imm)3021 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3022 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3023 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3024 EmitRex64(reg);
3025 EmitComplex(7, Operand(reg), imm);
3026 }
3027
3028
cmpq(CpuRegister reg,const Address & address)3029 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3030 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3031 EmitRex64(reg, address);
3032 EmitUint8(0x3B);
3033 EmitOperand(reg.LowBits(), address);
3034 }
3035
3036
cmpq(const Address & address,const Immediate & imm)3037 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3038 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
3039 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3040 EmitRex64(address);
3041 EmitComplex(7, address, imm);
3042 }
3043
3044
addl(CpuRegister dst,CpuRegister src)3045 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3046 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3047 EmitOptionalRex32(dst, src);
3048 EmitUint8(0x03);
3049 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3050 }
3051
3052
addl(CpuRegister reg,const Address & address)3053 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3054 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3055 EmitOptionalRex32(reg, address);
3056 EmitUint8(0x03);
3057 EmitOperand(reg.LowBits(), address);
3058 }
3059
3060
testl(CpuRegister reg1,CpuRegister reg2)3061 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3062 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3063 EmitOptionalRex32(reg1, reg2);
3064 EmitUint8(0x85);
3065 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3066 }
3067
3068
testl(CpuRegister reg,const Address & address)3069 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3070 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3071 EmitOptionalRex32(reg, address);
3072 EmitUint8(0x85);
3073 EmitOperand(reg.LowBits(), address);
3074 }
3075
3076
testl(CpuRegister reg,const Immediate & immediate)3077 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3078 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3079 // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3080 // we only test the byte CpuRegister to keep the encoding short.
3081 if (immediate.is_uint8() && reg.AsRegister() < 4) {
3082 // Use zero-extended 8-bit immediate.
3083 if (reg.AsRegister() == RAX) {
3084 EmitUint8(0xA8);
3085 } else {
3086 EmitUint8(0xF6);
3087 EmitUint8(0xC0 + reg.AsRegister());
3088 }
3089 EmitUint8(immediate.value() & 0xFF);
3090 } else if (reg.AsRegister() == RAX) {
3091 // Use short form if the destination is RAX.
3092 EmitUint8(0xA9);
3093 EmitImmediate(immediate);
3094 } else {
3095 EmitOptionalRex32(reg);
3096 EmitUint8(0xF7);
3097 EmitOperand(0, Operand(reg));
3098 EmitImmediate(immediate);
3099 }
3100 }
3101
3102
testq(CpuRegister reg1,CpuRegister reg2)3103 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
3104 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3105 EmitRex64(reg1, reg2);
3106 EmitUint8(0x85);
3107 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3108 }
3109
3110
testq(CpuRegister reg,const Address & address)3111 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
3112 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3113 EmitRex64(reg, address);
3114 EmitUint8(0x85);
3115 EmitOperand(reg.LowBits(), address);
3116 }
3117
3118
testb(const Address & dst,const Immediate & imm)3119 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
3120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121 EmitOptionalRex32(dst);
3122 EmitUint8(0xF6);
3123 EmitOperand(Register::RAX, dst);
3124 CHECK(imm.is_int8());
3125 EmitUint8(imm.value() & 0xFF);
3126 }
3127
3128
testl(const Address & dst,const Immediate & imm)3129 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
3130 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3131 EmitOptionalRex32(dst);
3132 EmitUint8(0xF7);
3133 EmitOperand(0, dst);
3134 EmitImmediate(imm);
3135 }
3136
3137
andl(CpuRegister dst,CpuRegister src)3138 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
3139 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3140 EmitOptionalRex32(dst, src);
3141 EmitUint8(0x23);
3142 EmitOperand(dst.LowBits(), Operand(src));
3143 }
3144
3145
andl(CpuRegister reg,const Address & address)3146 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
3147 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3148 EmitOptionalRex32(reg, address);
3149 EmitUint8(0x23);
3150 EmitOperand(reg.LowBits(), address);
3151 }
3152
3153
andl(CpuRegister dst,const Immediate & imm)3154 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
3155 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3156 CHECK(imm.is_int32()); // andl only supports 32b immediate.
3157 EmitOptionalRex32(dst);
3158 EmitComplex(4, Operand(dst), imm);
3159 }
3160
3161
andq(CpuRegister reg,const Immediate & imm)3162 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
3163 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3164 CHECK(imm.is_int32()); // andq only supports 32b immediate.
3165 EmitRex64(reg);
3166 EmitComplex(4, Operand(reg), imm);
3167 }
3168
3169
andq(CpuRegister dst,CpuRegister src)3170 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
3171 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3172 EmitRex64(dst, src);
3173 EmitUint8(0x23);
3174 EmitOperand(dst.LowBits(), Operand(src));
3175 }
3176
3177
andq(CpuRegister dst,const Address & src)3178 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
3179 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3180 EmitRex64(dst, src);
3181 EmitUint8(0x23);
3182 EmitOperand(dst.LowBits(), src);
3183 }
3184
3185
andw(const Address & address,const Immediate & imm)3186 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
3187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3188 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3189 EmitUint8(0x66);
3190 EmitOptionalRex32(address);
3191 EmitComplex(4, address, imm, /* is_16_op= */ true);
3192 }
3193
3194
orl(CpuRegister dst,CpuRegister src)3195 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
3196 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197 EmitOptionalRex32(dst, src);
3198 EmitUint8(0x0B);
3199 EmitOperand(dst.LowBits(), Operand(src));
3200 }
3201
3202
orl(CpuRegister reg,const Address & address)3203 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
3204 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3205 EmitOptionalRex32(reg, address);
3206 EmitUint8(0x0B);
3207 EmitOperand(reg.LowBits(), address);
3208 }
3209
3210
orl(CpuRegister dst,const Immediate & imm)3211 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
3212 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3213 EmitOptionalRex32(dst);
3214 EmitComplex(1, Operand(dst), imm);
3215 }
3216
3217
orq(CpuRegister dst,const Immediate & imm)3218 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
3219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3220 CHECK(imm.is_int32()); // orq only supports 32b immediate.
3221 EmitRex64(dst);
3222 EmitComplex(1, Operand(dst), imm);
3223 }
3224
3225
orq(CpuRegister dst,CpuRegister src)3226 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
3227 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3228 EmitRex64(dst, src);
3229 EmitUint8(0x0B);
3230 EmitOperand(dst.LowBits(), Operand(src));
3231 }
3232
3233
orq(CpuRegister dst,const Address & src)3234 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
3235 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3236 EmitRex64(dst, src);
3237 EmitUint8(0x0B);
3238 EmitOperand(dst.LowBits(), src);
3239 }
3240
3241
xorl(CpuRegister dst,CpuRegister src)3242 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
3243 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244 EmitOptionalRex32(dst, src);
3245 EmitUint8(0x33);
3246 EmitOperand(dst.LowBits(), Operand(src));
3247 }
3248
3249
xorl(CpuRegister reg,const Address & address)3250 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
3251 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252 EmitOptionalRex32(reg, address);
3253 EmitUint8(0x33);
3254 EmitOperand(reg.LowBits(), address);
3255 }
3256
3257
xorl(CpuRegister dst,const Immediate & imm)3258 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
3259 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3260 EmitOptionalRex32(dst);
3261 EmitComplex(6, Operand(dst), imm);
3262 }
3263
3264
xorq(CpuRegister dst,CpuRegister src)3265 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
3266 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3267 EmitRex64(dst, src);
3268 EmitUint8(0x33);
3269 EmitOperand(dst.LowBits(), Operand(src));
3270 }
3271
3272
xorq(CpuRegister dst,const Immediate & imm)3273 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
3274 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3275 CHECK(imm.is_int32()); // xorq only supports 32b immediate.
3276 EmitRex64(dst);
3277 EmitComplex(6, Operand(dst), imm);
3278 }
3279
xorq(CpuRegister dst,const Address & src)3280 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
3281 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282 EmitRex64(dst, src);
3283 EmitUint8(0x33);
3284 EmitOperand(dst.LowBits(), src);
3285 }
3286
3287
3288 #if 0
3289 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
3290 // REX.WRXB
3291 // W - 64-bit operand
3292 // R - MODRM.reg
3293 // X - SIB.index
3294 // B - MODRM.rm/SIB.base
3295 uint8_t rex = force ? 0x40 : 0;
3296 if (w) {
3297 rex |= 0x48; // REX.W000
3298 }
3299 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
3300 rex |= 0x44; // REX.0R00
3301 *r = static_cast<Register>(*r - 8);
3302 }
3303 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
3304 rex |= 0x42; // REX.00X0
3305 *x = static_cast<Register>(*x - 8);
3306 }
3307 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
3308 rex |= 0x41; // REX.000B
3309 *b = static_cast<Register>(*b - 8);
3310 }
3311 if (rex != 0) {
3312 EmitUint8(rex);
3313 }
3314 }
3315
3316 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
3317 // REX.WRXB
3318 // W - 64-bit operand
3319 // R - MODRM.reg
3320 // X - SIB.index
3321 // B - MODRM.rm/SIB.base
3322 uint8_t rex = mem->rex();
3323 if (force) {
3324 rex |= 0x40; // REX.0000
3325 }
3326 if (w) {
3327 rex |= 0x48; // REX.W000
3328 }
3329 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
3330 rex |= 0x44; // REX.0R00
3331 *dst = static_cast<Register>(*dst - 8);
3332 }
3333 if (rex != 0) {
3334 EmitUint8(rex);
3335 }
3336 }
3337
3338 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
3339 #endif
3340
addl(CpuRegister reg,const Immediate & imm)3341 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
3342 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3343 EmitOptionalRex32(reg);
3344 EmitComplex(0, Operand(reg), imm);
3345 }
3346
3347
addw(CpuRegister reg,const Immediate & imm)3348 void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) {
3349 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3350 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3351 EmitUint8(0x66);
3352 EmitOptionalRex32(reg);
3353 EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true);
3354 }
3355
3356
addq(CpuRegister reg,const Immediate & imm)3357 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
3358 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359 CHECK(imm.is_int32()); // addq only supports 32b immediate.
3360 EmitRex64(reg);
3361 EmitComplex(0, Operand(reg), imm);
3362 }
3363
3364
addq(CpuRegister dst,const Address & address)3365 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
3366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3367 EmitRex64(dst, address);
3368 EmitUint8(0x03);
3369 EmitOperand(dst.LowBits(), address);
3370 }
3371
3372
addq(CpuRegister dst,CpuRegister src)3373 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
3374 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3375 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
3376 EmitRex64(src, dst);
3377 EmitUint8(0x01);
3378 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3379 }
3380
3381
addl(const Address & address,CpuRegister reg)3382 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
3383 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3384 EmitOptionalRex32(reg, address);
3385 EmitUint8(0x01);
3386 EmitOperand(reg.LowBits(), address);
3387 }
3388
3389
addl(const Address & address,const Immediate & imm)3390 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
3391 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3392 EmitOptionalRex32(address);
3393 EmitComplex(0, address, imm);
3394 }
3395
3396
addw(const Address & address,const Immediate & imm)3397 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
3398 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3399 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
3400 EmitUint8(0x66);
3401 EmitOptionalRex32(address);
3402 EmitComplex(0, address, imm, /* is_16_op= */ true);
3403 }
3404
3405
addw(const Address & address,CpuRegister reg)3406 void X86_64Assembler::addw(const Address& address, CpuRegister reg) {
3407 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3408 EmitOperandSizeOverride();
3409 EmitOptionalRex32(reg, address);
3410 EmitUint8(0x01);
3411 EmitOperand(reg.LowBits(), address);
3412 }
3413
3414
subl(CpuRegister dst,CpuRegister src)3415 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
3416 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3417 EmitOptionalRex32(dst, src);
3418 EmitUint8(0x2B);
3419 EmitOperand(dst.LowBits(), Operand(src));
3420 }
3421
3422
subl(CpuRegister reg,const Immediate & imm)3423 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
3424 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3425 EmitOptionalRex32(reg);
3426 EmitComplex(5, Operand(reg), imm);
3427 }
3428
3429
subq(CpuRegister reg,const Immediate & imm)3430 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
3431 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3432 CHECK(imm.is_int32()); // subq only supports 32b immediate.
3433 EmitRex64(reg);
3434 EmitComplex(5, Operand(reg), imm);
3435 }
3436
3437
subq(CpuRegister dst,CpuRegister src)3438 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
3439 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440 EmitRex64(dst, src);
3441 EmitUint8(0x2B);
3442 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3443 }
3444
3445
subq(CpuRegister reg,const Address & address)3446 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
3447 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3448 EmitRex64(reg, address);
3449 EmitUint8(0x2B);
3450 EmitOperand(reg.LowBits() & 7, address);
3451 }
3452
3453
subl(CpuRegister reg,const Address & address)3454 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
3455 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3456 EmitOptionalRex32(reg, address);
3457 EmitUint8(0x2B);
3458 EmitOperand(reg.LowBits(), address);
3459 }
3460
3461
cdq()3462 void X86_64Assembler::cdq() {
3463 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3464 EmitUint8(0x99);
3465 }
3466
3467
cqo()3468 void X86_64Assembler::cqo() {
3469 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3470 EmitRex64();
3471 EmitUint8(0x99);
3472 }
3473
3474
idivl(CpuRegister reg)3475 void X86_64Assembler::idivl(CpuRegister reg) {
3476 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477 EmitOptionalRex32(reg);
3478 EmitUint8(0xF7);
3479 EmitUint8(0xF8 | reg.LowBits());
3480 }
3481
3482
idivq(CpuRegister reg)3483 void X86_64Assembler::idivq(CpuRegister reg) {
3484 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3485 EmitRex64(reg);
3486 EmitUint8(0xF7);
3487 EmitUint8(0xF8 | reg.LowBits());
3488 }
3489
3490
divl(CpuRegister reg)3491 void X86_64Assembler::divl(CpuRegister reg) {
3492 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3493 EmitOptionalRex32(reg);
3494 EmitUint8(0xF7);
3495 EmitUint8(0xF0 | reg.LowBits());
3496 }
3497
3498
divq(CpuRegister reg)3499 void X86_64Assembler::divq(CpuRegister reg) {
3500 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3501 EmitRex64(reg);
3502 EmitUint8(0xF7);
3503 EmitUint8(0xF0 | reg.LowBits());
3504 }
3505
3506
imull(CpuRegister dst,CpuRegister src)3507 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
3508 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509 EmitOptionalRex32(dst, src);
3510 EmitUint8(0x0F);
3511 EmitUint8(0xAF);
3512 EmitOperand(dst.LowBits(), Operand(src));
3513 }
3514
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)3515 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
3516 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3517 CHECK(imm.is_int32()); // imull only supports 32b immediate.
3518
3519 EmitOptionalRex32(dst, src);
3520
3521 // See whether imm can be represented as a sign-extended 8bit value.
3522 int32_t v32 = static_cast<int32_t>(imm.value());
3523 if (IsInt<8>(v32)) {
3524 // Sign-extension works.
3525 EmitUint8(0x6B);
3526 EmitOperand(dst.LowBits(), Operand(src));
3527 EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
3528 } else {
3529 // Not representable, use full immediate.
3530 EmitUint8(0x69);
3531 EmitOperand(dst.LowBits(), Operand(src));
3532 EmitImmediate(imm);
3533 }
3534 }
3535
3536
imull(CpuRegister reg,const Immediate & imm)3537 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
3538 imull(reg, reg, imm);
3539 }
3540
3541
imull(CpuRegister reg,const Address & address)3542 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
3543 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3544 EmitOptionalRex32(reg, address);
3545 EmitUint8(0x0F);
3546 EmitUint8(0xAF);
3547 EmitOperand(reg.LowBits(), address);
3548 }
3549
3550
imulq(CpuRegister dst,CpuRegister src)3551 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
3552 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3553 EmitRex64(dst, src);
3554 EmitUint8(0x0F);
3555 EmitUint8(0xAF);
3556 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3557 }
3558
3559
imulq(CpuRegister reg,const Immediate & imm)3560 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
3561 imulq(reg, reg, imm);
3562 }
3563
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)3564 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
3565 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3566 CHECK(imm.is_int32()); // imulq only supports 32b immediate.
3567
3568 EmitRex64(dst, reg);
3569
3570 // See whether imm can be represented as a sign-extended 8bit value.
3571 int64_t v64 = imm.value();
3572 if (IsInt<8>(v64)) {
3573 // Sign-extension works.
3574 EmitUint8(0x6B);
3575 EmitOperand(dst.LowBits(), Operand(reg));
3576 EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
3577 } else {
3578 // Not representable, use full immediate.
3579 EmitUint8(0x69);
3580 EmitOperand(dst.LowBits(), Operand(reg));
3581 EmitImmediate(imm);
3582 }
3583 }
3584
imulq(CpuRegister reg,const Address & address)3585 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
3586 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3587 EmitRex64(reg, address);
3588 EmitUint8(0x0F);
3589 EmitUint8(0xAF);
3590 EmitOperand(reg.LowBits(), address);
3591 }
3592
3593
imull(CpuRegister reg)3594 void X86_64Assembler::imull(CpuRegister reg) {
3595 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3596 EmitOptionalRex32(reg);
3597 EmitUint8(0xF7);
3598 EmitOperand(5, Operand(reg));
3599 }
3600
3601
imulq(CpuRegister reg)3602 void X86_64Assembler::imulq(CpuRegister reg) {
3603 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3604 EmitRex64(reg);
3605 EmitUint8(0xF7);
3606 EmitOperand(5, Operand(reg));
3607 }
3608
3609
imull(const Address & address)3610 void X86_64Assembler::imull(const Address& address) {
3611 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3612 EmitOptionalRex32(address);
3613 EmitUint8(0xF7);
3614 EmitOperand(5, address);
3615 }
3616
3617
mull(CpuRegister reg)3618 void X86_64Assembler::mull(CpuRegister reg) {
3619 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3620 EmitOptionalRex32(reg);
3621 EmitUint8(0xF7);
3622 EmitOperand(4, Operand(reg));
3623 }
3624
3625
mull(const Address & address)3626 void X86_64Assembler::mull(const Address& address) {
3627 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3628 EmitOptionalRex32(address);
3629 EmitUint8(0xF7);
3630 EmitOperand(4, address);
3631 }
3632
3633
shll(CpuRegister reg,const Immediate & imm)3634 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
3635 EmitGenericShift(false, 4, reg, imm);
3636 }
3637
3638
shlq(CpuRegister reg,const Immediate & imm)3639 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
3640 EmitGenericShift(true, 4, reg, imm);
3641 }
3642
3643
shll(CpuRegister operand,CpuRegister shifter)3644 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
3645 EmitGenericShift(false, 4, operand, shifter);
3646 }
3647
3648
shlq(CpuRegister operand,CpuRegister shifter)3649 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
3650 EmitGenericShift(true, 4, operand, shifter);
3651 }
3652
3653
shrl(CpuRegister reg,const Immediate & imm)3654 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
3655 EmitGenericShift(false, 5, reg, imm);
3656 }
3657
3658
shrq(CpuRegister reg,const Immediate & imm)3659 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
3660 EmitGenericShift(true, 5, reg, imm);
3661 }
3662
3663
shrl(CpuRegister operand,CpuRegister shifter)3664 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
3665 EmitGenericShift(false, 5, operand, shifter);
3666 }
3667
3668
shrq(CpuRegister operand,CpuRegister shifter)3669 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
3670 EmitGenericShift(true, 5, operand, shifter);
3671 }
3672
3673
sarl(CpuRegister reg,const Immediate & imm)3674 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
3675 EmitGenericShift(false, 7, reg, imm);
3676 }
3677
3678
sarl(CpuRegister operand,CpuRegister shifter)3679 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
3680 EmitGenericShift(false, 7, operand, shifter);
3681 }
3682
3683
sarq(CpuRegister reg,const Immediate & imm)3684 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
3685 EmitGenericShift(true, 7, reg, imm);
3686 }
3687
3688
sarq(CpuRegister operand,CpuRegister shifter)3689 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
3690 EmitGenericShift(true, 7, operand, shifter);
3691 }
3692
3693
roll(CpuRegister reg,const Immediate & imm)3694 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
3695 EmitGenericShift(false, 0, reg, imm);
3696 }
3697
3698
roll(CpuRegister operand,CpuRegister shifter)3699 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
3700 EmitGenericShift(false, 0, operand, shifter);
3701 }
3702
3703
rorl(CpuRegister reg,const Immediate & imm)3704 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
3705 EmitGenericShift(false, 1, reg, imm);
3706 }
3707
3708
rorl(CpuRegister operand,CpuRegister shifter)3709 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
3710 EmitGenericShift(false, 1, operand, shifter);
3711 }
3712
3713
rolq(CpuRegister reg,const Immediate & imm)3714 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
3715 EmitGenericShift(true, 0, reg, imm);
3716 }
3717
3718
rolq(CpuRegister operand,CpuRegister shifter)3719 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
3720 EmitGenericShift(true, 0, operand, shifter);
3721 }
3722
3723
rorq(CpuRegister reg,const Immediate & imm)3724 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
3725 EmitGenericShift(true, 1, reg, imm);
3726 }
3727
3728
rorq(CpuRegister operand,CpuRegister shifter)3729 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
3730 EmitGenericShift(true, 1, operand, shifter);
3731 }
3732
3733
negl(CpuRegister reg)3734 void X86_64Assembler::negl(CpuRegister reg) {
3735 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3736 EmitOptionalRex32(reg);
3737 EmitUint8(0xF7);
3738 EmitOperand(3, Operand(reg));
3739 }
3740
3741
negq(CpuRegister reg)3742 void X86_64Assembler::negq(CpuRegister reg) {
3743 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3744 EmitRex64(reg);
3745 EmitUint8(0xF7);
3746 EmitOperand(3, Operand(reg));
3747 }
3748
3749
notl(CpuRegister reg)3750 void X86_64Assembler::notl(CpuRegister reg) {
3751 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3752 EmitOptionalRex32(reg);
3753 EmitUint8(0xF7);
3754 EmitUint8(0xD0 | reg.LowBits());
3755 }
3756
3757
notq(CpuRegister reg)3758 void X86_64Assembler::notq(CpuRegister reg) {
3759 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3760 EmitRex64(reg);
3761 EmitUint8(0xF7);
3762 EmitOperand(2, Operand(reg));
3763 }
3764
3765
enter(const Immediate & imm)3766 void X86_64Assembler::enter(const Immediate& imm) {
3767 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768 EmitUint8(0xC8);
3769 CHECK(imm.is_uint16()) << imm.value();
3770 EmitUint8(imm.value() & 0xFF);
3771 EmitUint8((imm.value() >> 8) & 0xFF);
3772 EmitUint8(0x00);
3773 }
3774
3775
leave()3776 void X86_64Assembler::leave() {
3777 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3778 EmitUint8(0xC9);
3779 }
3780
3781
ret()3782 void X86_64Assembler::ret() {
3783 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3784 EmitUint8(0xC3);
3785 }
3786
3787
ret(const Immediate & imm)3788 void X86_64Assembler::ret(const Immediate& imm) {
3789 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790 EmitUint8(0xC2);
3791 CHECK(imm.is_uint16());
3792 EmitUint8(imm.value() & 0xFF);
3793 EmitUint8((imm.value() >> 8) & 0xFF);
3794 }
3795
3796
3797
nop()3798 void X86_64Assembler::nop() {
3799 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3800 EmitUint8(0x90);
3801 }
3802
3803
int3()3804 void X86_64Assembler::int3() {
3805 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3806 EmitUint8(0xCC);
3807 }
3808
3809
hlt()3810 void X86_64Assembler::hlt() {
3811 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3812 EmitUint8(0xF4);
3813 }
3814
3815
j(Condition condition,Label * label)3816 void X86_64Assembler::j(Condition condition, Label* label) {
3817 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3818 if (label->IsBound()) {
3819 static const int kShortSize = 2;
3820 static const int kLongSize = 6;
3821 int offset = label->Position() - buffer_.Size();
3822 CHECK_LE(offset, 0);
3823 if (IsInt<8>(offset - kShortSize)) {
3824 EmitUint8(0x70 + condition);
3825 EmitUint8((offset - kShortSize) & 0xFF);
3826 } else {
3827 EmitUint8(0x0F);
3828 EmitUint8(0x80 + condition);
3829 EmitInt32(offset - kLongSize);
3830 }
3831 } else {
3832 EmitUint8(0x0F);
3833 EmitUint8(0x80 + condition);
3834 EmitLabelLink(label);
3835 }
3836 }
3837
3838
j(Condition condition,NearLabel * label)3839 void X86_64Assembler::j(Condition condition, NearLabel* label) {
3840 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3841 if (label->IsBound()) {
3842 static const int kShortSize = 2;
3843 int offset = label->Position() - buffer_.Size();
3844 CHECK_LE(offset, 0);
3845 CHECK(IsInt<8>(offset - kShortSize));
3846 EmitUint8(0x70 + condition);
3847 EmitUint8((offset - kShortSize) & 0xFF);
3848 } else {
3849 EmitUint8(0x70 + condition);
3850 EmitLabelLink(label);
3851 }
3852 }
3853
3854
jrcxz(NearLabel * label)3855 void X86_64Assembler::jrcxz(NearLabel* label) {
3856 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3857 if (label->IsBound()) {
3858 static const int kShortSize = 2;
3859 int offset = label->Position() - buffer_.Size();
3860 CHECK_LE(offset, 0);
3861 CHECK(IsInt<8>(offset - kShortSize));
3862 EmitUint8(0xE3);
3863 EmitUint8((offset - kShortSize) & 0xFF);
3864 } else {
3865 EmitUint8(0xE3);
3866 EmitLabelLink(label);
3867 }
3868 }
3869
3870
jmp(CpuRegister reg)3871 void X86_64Assembler::jmp(CpuRegister reg) {
3872 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3873 EmitOptionalRex32(reg);
3874 EmitUint8(0xFF);
3875 EmitRegisterOperand(4, reg.LowBits());
3876 }
3877
jmp(const Address & address)3878 void X86_64Assembler::jmp(const Address& address) {
3879 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880 EmitOptionalRex32(address);
3881 EmitUint8(0xFF);
3882 EmitOperand(4, address);
3883 }
3884
jmp(Label * label)3885 void X86_64Assembler::jmp(Label* label) {
3886 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3887 if (label->IsBound()) {
3888 static const int kShortSize = 2;
3889 static const int kLongSize = 5;
3890 int offset = label->Position() - buffer_.Size();
3891 CHECK_LE(offset, 0);
3892 if (IsInt<8>(offset - kShortSize)) {
3893 EmitUint8(0xEB);
3894 EmitUint8((offset - kShortSize) & 0xFF);
3895 } else {
3896 EmitUint8(0xE9);
3897 EmitInt32(offset - kLongSize);
3898 }
3899 } else {
3900 EmitUint8(0xE9);
3901 EmitLabelLink(label);
3902 }
3903 }
3904
3905
jmp(NearLabel * label)3906 void X86_64Assembler::jmp(NearLabel* label) {
3907 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3908 if (label->IsBound()) {
3909 static const int kShortSize = 2;
3910 int offset = label->Position() - buffer_.Size();
3911 CHECK_LE(offset, 0);
3912 CHECK(IsInt<8>(offset - kShortSize));
3913 EmitUint8(0xEB);
3914 EmitUint8((offset - kShortSize) & 0xFF);
3915 } else {
3916 EmitUint8(0xEB);
3917 EmitLabelLink(label);
3918 }
3919 }
3920
3921
rep_movsw()3922 void X86_64Assembler::rep_movsw() {
3923 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3924 EmitUint8(0x66);
3925 EmitUint8(0xF3);
3926 EmitUint8(0xA5);
3927 }
3928
rep_movsb()3929 void X86_64Assembler::rep_movsb() {
3930 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3931 EmitUint8(0xF3);
3932 EmitUint8(0xA4);
3933 }
3934
rep_movsl()3935 void X86_64Assembler::rep_movsl() {
3936 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937 EmitUint8(0xF3);
3938 EmitUint8(0xA5);
3939 }
3940
lock()3941 X86_64Assembler* X86_64Assembler::lock() {
3942 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3943 EmitUint8(0xF0);
3944 return this;
3945 }
3946
3947
cmpxchgb(const Address & address,CpuRegister reg)3948 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
3949 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3950 EmitOptionalByteRegNormalizingRex32(reg, address);
3951 EmitUint8(0x0F);
3952 EmitUint8(0xB0);
3953 EmitOperand(reg.LowBits(), address);
3954 }
3955
3956
cmpxchgw(const Address & address,CpuRegister reg)3957 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
3958 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3959 EmitOperandSizeOverride();
3960 EmitOptionalRex32(reg, address);
3961 EmitUint8(0x0F);
3962 EmitUint8(0xB1);
3963 EmitOperand(reg.LowBits(), address);
3964 }
3965
3966
cmpxchgl(const Address & address,CpuRegister reg)3967 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
3968 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969 EmitOptionalRex32(reg, address);
3970 EmitUint8(0x0F);
3971 EmitUint8(0xB1);
3972 EmitOperand(reg.LowBits(), address);
3973 }
3974
3975
cmpxchgq(const Address & address,CpuRegister reg)3976 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
3977 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3978 EmitRex64(reg, address);
3979 EmitUint8(0x0F);
3980 EmitUint8(0xB1);
3981 EmitOperand(reg.LowBits(), address);
3982 }
3983
3984
mfence()3985 void X86_64Assembler::mfence() {
3986 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3987 EmitUint8(0x0F);
3988 EmitUint8(0xAE);
3989 EmitUint8(0xF0);
3990 }
3991
3992
gs()3993 X86_64Assembler* X86_64Assembler::gs() {
3994 // TODO: gs is a prefix and not an instruction
3995 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3996 EmitUint8(0x65);
3997 return this;
3998 }
3999
4000
AddImmediate(CpuRegister reg,const Immediate & imm)4001 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4002 int value = imm.value();
4003 if (value != 0) {
4004 if (value > 0) {
4005 addl(reg, imm);
4006 } else {
4007 subl(reg, Immediate(value));
4008 }
4009 }
4010 }
4011
4012
setcc(Condition condition,CpuRegister dst)4013 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4014 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4015 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4016 if (dst.NeedsRex() || dst.AsRegister() > 3) {
4017 EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4018 }
4019 EmitUint8(0x0F);
4020 EmitUint8(0x90 + condition);
4021 EmitUint8(0xC0 + dst.LowBits());
4022 }
4023
blsi(CpuRegister dst,CpuRegister src)4024 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4025 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4026 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4027 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4028 /*X=*/ false,
4029 src.NeedsRex(),
4030 SET_VEX_M_0F_38);
4031 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4032 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4033 SET_VEX_L_128,
4034 SET_VEX_PP_NONE);
4035 EmitUint8(byte_zero);
4036 EmitUint8(byte_one);
4037 EmitUint8(byte_two);
4038 EmitUint8(0xF3);
4039 EmitRegisterOperand(3, src.LowBits());
4040 }
4041
blsmsk(CpuRegister dst,CpuRegister src)4042 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4043 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4044 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4045 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4046 /*X=*/ false,
4047 src.NeedsRex(),
4048 SET_VEX_M_0F_38);
4049 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4050 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4051 SET_VEX_L_128,
4052 SET_VEX_PP_NONE);
4053 EmitUint8(byte_zero);
4054 EmitUint8(byte_one);
4055 EmitUint8(byte_two);
4056 EmitUint8(0xF3);
4057 EmitRegisterOperand(2, src.LowBits());
4058 }
4059
blsr(CpuRegister dst,CpuRegister src)4060 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4063 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4064 /*X=*/ false,
4065 src.NeedsRex(),
4066 SET_VEX_M_0F_38);
4067 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4068 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4069 SET_VEX_L_128,
4070 SET_VEX_PP_NONE);
4071 EmitUint8(byte_zero);
4072 EmitUint8(byte_one);
4073 EmitUint8(byte_two);
4074 EmitUint8(0xF3);
4075 EmitRegisterOperand(1, src.LowBits());
4076 }
4077
bswapl(CpuRegister dst)4078 void X86_64Assembler::bswapl(CpuRegister dst) {
4079 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4080 EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4081 EmitUint8(0x0F);
4082 EmitUint8(0xC8 + dst.LowBits());
4083 }
4084
bswapq(CpuRegister dst)4085 void X86_64Assembler::bswapq(CpuRegister dst) {
4086 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4087 EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4088 EmitUint8(0x0F);
4089 EmitUint8(0xC8 + dst.LowBits());
4090 }
4091
bsfl(CpuRegister dst,CpuRegister src)4092 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4093 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4094 EmitOptionalRex32(dst, src);
4095 EmitUint8(0x0F);
4096 EmitUint8(0xBC);
4097 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4098 }
4099
bsfl(CpuRegister dst,const Address & src)4100 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4101 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4102 EmitOptionalRex32(dst, src);
4103 EmitUint8(0x0F);
4104 EmitUint8(0xBC);
4105 EmitOperand(dst.LowBits(), src);
4106 }
4107
bsfq(CpuRegister dst,CpuRegister src)4108 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4109 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4110 EmitRex64(dst, src);
4111 EmitUint8(0x0F);
4112 EmitUint8(0xBC);
4113 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4114 }
4115
bsfq(CpuRegister dst,const Address & src)4116 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4117 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4118 EmitRex64(dst, src);
4119 EmitUint8(0x0F);
4120 EmitUint8(0xBC);
4121 EmitOperand(dst.LowBits(), src);
4122 }
4123
bsrl(CpuRegister dst,CpuRegister src)4124 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4125 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4126 EmitOptionalRex32(dst, src);
4127 EmitUint8(0x0F);
4128 EmitUint8(0xBD);
4129 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4130 }
4131
bsrl(CpuRegister dst,const Address & src)4132 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4133 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4134 EmitOptionalRex32(dst, src);
4135 EmitUint8(0x0F);
4136 EmitUint8(0xBD);
4137 EmitOperand(dst.LowBits(), src);
4138 }
4139
bsrq(CpuRegister dst,CpuRegister src)4140 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
4141 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4142 EmitRex64(dst, src);
4143 EmitUint8(0x0F);
4144 EmitUint8(0xBD);
4145 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4146 }
4147
bsrq(CpuRegister dst,const Address & src)4148 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
4149 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4150 EmitRex64(dst, src);
4151 EmitUint8(0x0F);
4152 EmitUint8(0xBD);
4153 EmitOperand(dst.LowBits(), src);
4154 }
4155
popcntl(CpuRegister dst,CpuRegister src)4156 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
4157 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4158 EmitUint8(0xF3);
4159 EmitOptionalRex32(dst, src);
4160 EmitUint8(0x0F);
4161 EmitUint8(0xB8);
4162 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4163 }
4164
popcntl(CpuRegister dst,const Address & src)4165 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
4166 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4167 EmitUint8(0xF3);
4168 EmitOptionalRex32(dst, src);
4169 EmitUint8(0x0F);
4170 EmitUint8(0xB8);
4171 EmitOperand(dst.LowBits(), src);
4172 }
4173
popcntq(CpuRegister dst,CpuRegister src)4174 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
4175 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4176 EmitUint8(0xF3);
4177 EmitRex64(dst, src);
4178 EmitUint8(0x0F);
4179 EmitUint8(0xB8);
4180 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4181 }
4182
popcntq(CpuRegister dst,const Address & src)4183 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
4184 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4185 EmitUint8(0xF3);
4186 EmitRex64(dst, src);
4187 EmitUint8(0x0F);
4188 EmitUint8(0xB8);
4189 EmitOperand(dst.LowBits(), src);
4190 }
4191
rdtsc()4192 void X86_64Assembler::rdtsc() {
4193 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4194 EmitUint8(0x0F);
4195 EmitUint8(0x31);
4196 }
4197
repne_scasb()4198 void X86_64Assembler::repne_scasb() {
4199 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4200 EmitUint8(0xF2);
4201 EmitUint8(0xAE);
4202 }
4203
repne_scasw()4204 void X86_64Assembler::repne_scasw() {
4205 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4206 EmitUint8(0x66);
4207 EmitUint8(0xF2);
4208 EmitUint8(0xAF);
4209 }
4210
repe_cmpsw()4211 void X86_64Assembler::repe_cmpsw() {
4212 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4213 EmitUint8(0x66);
4214 EmitUint8(0xF3);
4215 EmitUint8(0xA7);
4216 }
4217
4218
repe_cmpsl()4219 void X86_64Assembler::repe_cmpsl() {
4220 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4221 EmitUint8(0xF3);
4222 EmitUint8(0xA7);
4223 }
4224
4225
repe_cmpsq()4226 void X86_64Assembler::repe_cmpsq() {
4227 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4228 EmitUint8(0xF3);
4229 EmitRex64();
4230 EmitUint8(0xA7);
4231 }
4232
ud2()4233 void X86_64Assembler::ud2() {
4234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4235 EmitUint8(0x0F);
4236 EmitUint8(0x0B);
4237 }
4238
LoadDoubleConstant(XmmRegister dst,double value)4239 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
4240 // TODO: Need to have a code constants table.
4241 int64_t constant = bit_cast<int64_t, double>(value);
4242 pushq(Immediate(High32Bits(constant)));
4243 pushq(Immediate(Low32Bits(constant)));
4244 movsd(dst, Address(CpuRegister(RSP), 0));
4245 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
4246 }
4247
4248
Align(int alignment,int offset)4249 void X86_64Assembler::Align(int alignment, int offset) {
4250 CHECK(IsPowerOfTwo(alignment));
4251 // Emit nop instruction until the real position is aligned.
4252 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
4253 nop();
4254 }
4255 }
4256
4257
Bind(Label * label)4258 void X86_64Assembler::Bind(Label* label) {
4259 int bound = buffer_.Size();
4260 CHECK(!label->IsBound()); // Labels can only be bound once.
4261 while (label->IsLinked()) {
4262 int position = label->LinkPosition();
4263 int next = buffer_.Load<int32_t>(position);
4264 buffer_.Store<int32_t>(position, bound - (position + 4));
4265 label->position_ = next;
4266 }
4267 label->BindTo(bound);
4268 }
4269
4270
Bind(NearLabel * label)4271 void X86_64Assembler::Bind(NearLabel* label) {
4272 int bound = buffer_.Size();
4273 CHECK(!label->IsBound()); // Labels can only be bound once.
4274 while (label->IsLinked()) {
4275 int position = label->LinkPosition();
4276 uint8_t delta = buffer_.Load<uint8_t>(position);
4277 int offset = bound - (position + 1);
4278 CHECK(IsInt<8>(offset));
4279 buffer_.Store<int8_t>(position, offset);
4280 label->position_ = delta != 0u ? label->position_ - delta : 0;
4281 }
4282 label->BindTo(bound);
4283 }
4284
4285
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)4286 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
4287 CHECK_GE(reg_or_opcode, 0);
4288 CHECK_LT(reg_or_opcode, 8);
4289 const int length = operand.length_;
4290 CHECK_GT(length, 0);
4291 // Emit the ModRM byte updated with the given reg value.
4292 CHECK_EQ(operand.encoding_[0] & 0x38, 0);
4293 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
4294 // Emit the rest of the encoded operand.
4295 for (int i = 1; i < length; i++) {
4296 EmitUint8(operand.encoding_[i]);
4297 }
4298 AssemblerFixup* fixup = operand.GetFixup();
4299 if (fixup != nullptr) {
4300 EmitFixup(fixup);
4301 }
4302 }
4303
4304
EmitImmediate(const Immediate & imm,bool is_16_op)4305 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
4306 if (is_16_op) {
4307 EmitUint8(imm.value() & 0xFF);
4308 EmitUint8(imm.value() >> 8);
4309 } else if (imm.is_int32()) {
4310 EmitInt32(static_cast<int32_t>(imm.value()));
4311 } else {
4312 EmitInt64(imm.value());
4313 }
4314 }
4315
4316
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)4317 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
4318 const Operand& operand,
4319 const Immediate& immediate,
4320 bool is_16_op) {
4321 CHECK_GE(reg_or_opcode, 0);
4322 CHECK_LT(reg_or_opcode, 8);
4323 if (immediate.is_int8()) {
4324 // Use sign-extended 8-bit immediate.
4325 EmitUint8(0x83);
4326 EmitOperand(reg_or_opcode, operand);
4327 EmitUint8(immediate.value() & 0xFF);
4328 } else if (operand.IsRegister(CpuRegister(RAX))) {
4329 // Use short form if the destination is eax.
4330 EmitUint8(0x05 + (reg_or_opcode << 3));
4331 EmitImmediate(immediate, is_16_op);
4332 } else {
4333 EmitUint8(0x81);
4334 EmitOperand(reg_or_opcode, operand);
4335 EmitImmediate(immediate, is_16_op);
4336 }
4337 }
4338
4339
EmitLabel(Label * label,int instruction_size)4340 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
4341 if (label->IsBound()) {
4342 int offset = label->Position() - buffer_.Size();
4343 CHECK_LE(offset, 0);
4344 EmitInt32(offset - instruction_size);
4345 } else {
4346 EmitLabelLink(label);
4347 }
4348 }
4349
4350
EmitLabelLink(Label * label)4351 void X86_64Assembler::EmitLabelLink(Label* label) {
4352 CHECK(!label->IsBound());
4353 int position = buffer_.Size();
4354 EmitInt32(label->position_);
4355 label->LinkTo(position);
4356 }
4357
4358
EmitLabelLink(NearLabel * label)4359 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
4360 CHECK(!label->IsBound());
4361 int position = buffer_.Size();
4362 if (label->IsLinked()) {
4363 // Save the delta in the byte that we have to play with.
4364 uint32_t delta = position - label->LinkPosition();
4365 CHECK(IsUint<8>(delta));
4366 EmitUint8(delta & 0xFF);
4367 } else {
4368 EmitUint8(0);
4369 }
4370 label->LinkTo(position);
4371 }
4372
4373
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)4374 void X86_64Assembler::EmitGenericShift(bool wide,
4375 int reg_or_opcode,
4376 CpuRegister reg,
4377 const Immediate& imm) {
4378 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4379 CHECK(imm.is_int8());
4380 if (wide) {
4381 EmitRex64(reg);
4382 } else {
4383 EmitOptionalRex32(reg);
4384 }
4385 if (imm.value() == 1) {
4386 EmitUint8(0xD1);
4387 EmitOperand(reg_or_opcode, Operand(reg));
4388 } else {
4389 EmitUint8(0xC1);
4390 EmitOperand(reg_or_opcode, Operand(reg));
4391 EmitUint8(imm.value() & 0xFF);
4392 }
4393 }
4394
4395
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)4396 void X86_64Assembler::EmitGenericShift(bool wide,
4397 int reg_or_opcode,
4398 CpuRegister operand,
4399 CpuRegister shifter) {
4400 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4401 CHECK_EQ(shifter.AsRegister(), RCX);
4402 if (wide) {
4403 EmitRex64(operand);
4404 } else {
4405 EmitOptionalRex32(operand);
4406 }
4407 EmitUint8(0xD3);
4408 EmitOperand(reg_or_opcode, Operand(operand));
4409 }
4410
EmitMovCpuFpu(XmmRegister fp_reg,CpuRegister cpu_reg,bool is64bit,uint8_t opcode)4411 void X86_64Assembler::EmitMovCpuFpu(
4412 XmmRegister fp_reg, CpuRegister cpu_reg, bool is64bit, uint8_t opcode) {
4413 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4414 EmitUint8(0x66);
4415 EmitOptionalRex(false, is64bit, fp_reg.NeedsRex(), false, cpu_reg.NeedsRex());
4416 EmitUint8(0x0F);
4417 EmitUint8(opcode);
4418 EmitOperand(fp_reg.LowBits(), Operand(cpu_reg));
4419 }
4420
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)4421 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
4422 // REX.WRXB
4423 // W - 64-bit operand
4424 // R - MODRM.reg
4425 // X - SIB.index
4426 // B - MODRM.rm/SIB.base
4427 uint8_t rex = force ? 0x40 : 0;
4428 if (w) {
4429 rex |= 0x48; // REX.W000
4430 }
4431 if (r) {
4432 rex |= 0x44; // REX.0R00
4433 }
4434 if (x) {
4435 rex |= 0x42; // REX.00X0
4436 }
4437 if (b) {
4438 rex |= 0x41; // REX.000B
4439 }
4440 if (rex != 0) {
4441 EmitUint8(rex);
4442 }
4443 }
4444
EmitOptionalRex32(CpuRegister reg)4445 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
4446 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
4447 }
4448
EmitOptionalRex32(CpuRegister dst,CpuRegister src)4449 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
4450 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4451 }
4452
EmitOptionalRex32(XmmRegister dst,XmmRegister src)4453 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
4454 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4455 }
4456
EmitOptionalRex32(CpuRegister dst,XmmRegister src)4457 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
4458 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4459 }
4460
EmitOptionalRex32(XmmRegister dst,CpuRegister src)4461 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
4462 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
4463 }
4464
EmitOptionalRex32(const Operand & operand)4465 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
4466 uint8_t rex = operand.rex();
4467 if (rex != 0) {
4468 EmitUint8(rex);
4469 }
4470 }
4471
EmitOptionalRex32(CpuRegister dst,const Operand & operand)4472 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
4473 uint8_t rex = operand.rex();
4474 if (dst.NeedsRex()) {
4475 rex |= 0x44; // REX.0R00
4476 }
4477 if (rex != 0) {
4478 EmitUint8(rex);
4479 }
4480 }
4481
EmitOptionalRex32(XmmRegister dst,const Operand & operand)4482 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
4483 uint8_t rex = operand.rex();
4484 if (dst.NeedsRex()) {
4485 rex |= 0x44; // REX.0R00
4486 }
4487 if (rex != 0) {
4488 EmitUint8(rex);
4489 }
4490 }
4491
EmitRex64()4492 void X86_64Assembler::EmitRex64() {
4493 EmitOptionalRex(false, true, false, false, false);
4494 }
4495
EmitRex64(CpuRegister reg)4496 void X86_64Assembler::EmitRex64(CpuRegister reg) {
4497 EmitOptionalRex(false, true, false, false, reg.NeedsRex());
4498 }
4499
EmitRex64(const Operand & operand)4500 void X86_64Assembler::EmitRex64(const Operand& operand) {
4501 uint8_t rex = operand.rex();
4502 rex |= 0x48; // REX.W000
4503 EmitUint8(rex);
4504 }
4505
EmitRex64(CpuRegister dst,CpuRegister src)4506 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
4507 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4508 }
4509
EmitRex64(XmmRegister dst,CpuRegister src)4510 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
4511 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4512 }
4513
EmitRex64(CpuRegister dst,XmmRegister src)4514 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
4515 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
4516 }
4517
EmitRex64(CpuRegister dst,const Operand & operand)4518 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
4519 uint8_t rex = 0x48 | operand.rex(); // REX.W000
4520 if (dst.NeedsRex()) {
4521 rex |= 0x44; // REX.0R00
4522 }
4523 EmitUint8(rex);
4524 }
4525
EmitRex64(XmmRegister dst,const Operand & operand)4526 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
4527 uint8_t rex = 0x48 | operand.rex(); // REX.W000
4528 if (dst.NeedsRex()) {
4529 rex |= 0x44; // REX.0R00
4530 }
4531 EmitUint8(rex);
4532 }
4533
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)4534 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
4535 CpuRegister src,
4536 bool normalize_both) {
4537 // SPL, BPL, SIL, DIL need the REX prefix.
4538 bool force = src.AsRegister() > 3;
4539 if (normalize_both) {
4540 // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
4541 // prefix if either `src` or `dst` needs it.
4542 force |= dst.AsRegister() > 3;
4543 } else {
4544 // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
4545 // They need REX prefix only if `src` needs it, but not `dst`.
4546 }
4547 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
4548 }
4549
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)4550 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
4551 uint8_t rex = operand.rex();
4552 // For dst, SPL, BPL, SIL, DIL need the rex prefix.
4553 bool force = dst.AsRegister() > 3;
4554 if (force) {
4555 rex |= 0x40; // REX.0000
4556 }
4557 if (dst.NeedsRex()) {
4558 rex |= 0x44; // REX.0R00
4559 }
4560 if (rex != 0) {
4561 EmitUint8(rex);
4562 }
4563 }
4564
AddConstantArea()4565 void X86_64Assembler::AddConstantArea() {
4566 ArrayRef<const int32_t> area = constant_area_.GetBuffer();
4567 for (size_t i = 0, e = area.size(); i < e; i++) {
4568 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4569 EmitInt32(area[i]);
4570 }
4571 }
4572
AppendInt32(int32_t v)4573 size_t ConstantArea::AppendInt32(int32_t v) {
4574 size_t result = buffer_.size() * elem_size_;
4575 buffer_.push_back(v);
4576 return result;
4577 }
4578
AddInt32(int32_t v)4579 size_t ConstantArea::AddInt32(int32_t v) {
4580 // Look for an existing match.
4581 for (size_t i = 0, e = buffer_.size(); i < e; i++) {
4582 if (v == buffer_[i]) {
4583 return i * elem_size_;
4584 }
4585 }
4586
4587 // Didn't match anything.
4588 return AppendInt32(v);
4589 }
4590
AddInt64(int64_t v)4591 size_t ConstantArea::AddInt64(int64_t v) {
4592 int32_t v_low = v;
4593 int32_t v_high = v >> 32;
4594 if (buffer_.size() > 1) {
4595 // Ensure we don't pass the end of the buffer.
4596 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
4597 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
4598 return i * elem_size_;
4599 }
4600 }
4601 }
4602
4603 // Didn't match anything.
4604 size_t result = buffer_.size() * elem_size_;
4605 buffer_.push_back(v_low);
4606 buffer_.push_back(v_high);
4607 return result;
4608 }
4609
AddDouble(double v)4610 size_t ConstantArea::AddDouble(double v) {
4611 // Treat the value as a 64-bit integer value.
4612 return AddInt64(bit_cast<int64_t, double>(v));
4613 }
4614
AddFloat(float v)4615 size_t ConstantArea::AddFloat(float v) {
4616 // Treat the value as a 32-bit integer value.
4617 return AddInt32(bit_cast<int32_t, float>(v));
4618 }
4619
EmitVexPrefixByteZero(bool is_twobyte_form)4620 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
4621 // Vex Byte 0,
4622 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
4623 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
4624 uint8_t vex_prefix = 0xC0;
4625 if (is_twobyte_form) {
4626 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex
4627 } else {
4628 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex
4629 }
4630 return vex_prefix;
4631 }
4632
EmitVexPrefixForAddress(const Address & addr,bool r,int vex_l,int vex_pp)4633 void X86_64Assembler::EmitVexPrefixForAddress(const Address& addr, bool r, int vex_l, int vex_pp) {
4634 uint8_t rex = addr.rex();
4635 bool rex_x = (rex & GET_REX_X) != 0u;
4636 bool rex_b = (rex & GET_REX_B) != 0u;
4637 bool is_twobyte_form = (!rex_b && !rex_x);
4638 uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
4639 uint8_t byte_one, byte_two;
4640 if (is_twobyte_form) {
4641 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
4642 byte_one = EmitVexPrefixByteOne(r, vvvv_reg, vex_l, vex_pp);
4643 } else {
4644 byte_one = EmitVexPrefixByteOne(r, rex_x, rex_b, SET_VEX_M_0F);
4645 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vex_l, vex_pp);
4646 }
4647 EmitUint8(byte_zero);
4648 EmitUint8(byte_one);
4649 if (!is_twobyte_form) {
4650 EmitUint8(byte_two);
4651 }
4652 }
4653
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)4654 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
4655 // Vex Byte 1,
4656 uint8_t vex_prefix = VEX_INIT;
4657 /** Bit[7] This bit needs to be set to '1'
4658 otherwise the instruction is LES or LDS */
4659 if (!R) {
4660 // R .
4661 vex_prefix |= SET_VEX_R;
4662 }
4663 /** Bit[6] This bit needs to be set to '1'
4664 otherwise the instruction is LES or LDS */
4665 if (!X) {
4666 // X .
4667 vex_prefix |= SET_VEX_X;
4668 }
4669 /** Bit[5] This bit needs to be set to '1' */
4670 if (!B) {
4671 // B .
4672 vex_prefix |= SET_VEX_B;
4673 }
4674 /** Bits[4:0], Based on the instruction documentaion */
4675 vex_prefix |= SET_VEX_M;
4676 return vex_prefix;
4677 }
4678
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)4679 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
4680 X86_64ManagedRegister operand,
4681 int SET_VEX_L,
4682 int SET_VEX_PP) {
4683 // Vex Byte 1,
4684 uint8_t vex_prefix = VEX_INIT;
4685 /** Bit[7] This bit needs to be set to '1'
4686 otherwise the instruction is LES or LDS */
4687 if (!R) {
4688 // R .
4689 vex_prefix |= SET_VEX_R;
4690 }
4691 /**Bits[6:3] - 'vvvv' the source or dest register specifier */
4692 if (operand.IsNoRegister()) {
4693 vex_prefix |= 0x78;
4694 } else if (operand.IsXmmRegister()) {
4695 XmmRegister vvvv = operand.AsXmmRegister();
4696 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
4697 uint8_t reg = static_cast<uint8_t>(inverted_reg);
4698 vex_prefix |= ((reg & 0x0F) << 3);
4699 } else if (operand.IsCpuRegister()) {
4700 CpuRegister vvvv = operand.AsCpuRegister();
4701 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
4702 uint8_t reg = static_cast<uint8_t>(inverted_reg);
4703 vex_prefix |= ((reg & 0x0F) << 3);
4704 }
4705 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4706 VEX.L = 0 indicates 128 bit vector operation */
4707 vex_prefix |= SET_VEX_L;
4708 // Bits[1:0] - "pp"
4709 vex_prefix |= SET_VEX_PP;
4710 return vex_prefix;
4711 }
4712
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)4713 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
4714 X86_64ManagedRegister operand,
4715 int SET_VEX_L,
4716 int SET_VEX_PP) {
4717 // Vex Byte 2,
4718 uint8_t vex_prefix = VEX_INIT;
4719
4720 /** Bit[7] This bits needs to be set to '1' with default value.
4721 When using C4H form of VEX prefix, REX.W value is ignored */
4722 if (W) {
4723 vex_prefix |= SET_VEX_W;
4724 }
4725 // Bits[6:3] - 'vvvv' the source or dest register specifier
4726 if (operand.IsXmmRegister()) {
4727 XmmRegister vvvv = operand.AsXmmRegister();
4728 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
4729 uint8_t reg = static_cast<uint8_t>(inverted_reg);
4730 vex_prefix |= ((reg & 0x0F) << 3);
4731 } else if (operand.IsCpuRegister()) {
4732 CpuRegister vvvv = operand.AsCpuRegister();
4733 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
4734 uint8_t reg = static_cast<uint8_t>(inverted_reg);
4735 vex_prefix |= ((reg & 0x0F) << 3);
4736 }
4737 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4738 VEX.L = 0 indicates 128 bit vector operation */
4739 vex_prefix |= SET_VEX_L;
4740 // Bits[1:0] - "pp"
4741 vex_prefix |= SET_VEX_PP;
4742 return vex_prefix;
4743 }
4744
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)4745 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
4746 int SET_VEX_L,
4747 int SET_VEX_PP) {
4748 // Vex Byte 2,
4749 uint8_t vex_prefix = VEX_INIT;
4750
4751 /** Bit[7] This bits needs to be set to '1' with default value.
4752 When using C4H form of VEX prefix, REX.W value is ignored */
4753 if (W) {
4754 vex_prefix |= SET_VEX_W;
4755 }
4756 /** Bits[6:3] - 'vvvv' the source or dest register specifier */
4757 vex_prefix |= (0x0F << 3);
4758 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
4759 VEX.L = 0 indicates 128 bit vector operation */
4760 vex_prefix |= SET_VEX_L;
4761
4762 // Bits[1:0] - "pp"
4763 if (SET_VEX_PP != SET_VEX_PP_NONE) {
4764 vex_prefix |= SET_VEX_PP;
4765 }
4766 return vex_prefix;
4767 }
4768
EmitVecArithAndLogicalOperation(XmmRegister dst,XmmRegister src1,XmmRegister src2,uint8_t opcode,int vex_pp,bool is_commutative)4769 void X86_64Assembler::EmitVecArithAndLogicalOperation(XmmRegister dst,
4770 XmmRegister src1,
4771 XmmRegister src2,
4772 uint8_t opcode,
4773 int vex_pp,
4774 bool is_commutative) {
4775 if (is_commutative && src2.NeedsRex() && !src1.NeedsRex()) {
4776 return EmitVecArithAndLogicalOperation(dst, src2, src1, opcode, vex_pp, is_commutative);
4777 }
4778 DCHECK(CpuHasAVXorAVX2FeatureFlag());
4779 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4780 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
4781 bool is_twobyte_form = !src2.NeedsRex();
4782 uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
4783 uint8_t byte_one, byte_two;
4784 if (is_twobyte_form) {
4785 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, vex_pp);
4786 } else {
4787 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
4788 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, vex_pp);
4789 }
4790 EmitUint8(byte_zero);
4791 EmitUint8(byte_one);
4792 if (!is_twobyte_form) {
4793 EmitUint8(byte_two);
4794 }
4795 EmitUint8(opcode);
4796 EmitXmmRegisterOperand(dst.LowBits(), src2);
4797 }
4798
4799 } // namespace x86_64
4800 } // namespace art
4801