1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "assembler_x86_64.h"
18
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23
24 namespace art HIDDEN {
25 namespace x86_64 {
26
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28 return os << reg.AsRegister();
29 }
30
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32 return os << reg.AsFloatRegister();
33 }
34
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36 return os << "ST" << static_cast<int>(reg);
37 }
38
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40 switch (addr.mod()) {
41 case 0:
42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43 return os << "(%" << addr.cpu_rm() << ")";
44 } else if (addr.base() == RBP) {
45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46 << "," << (1 << addr.scale()) << ")";
47 }
48 return os << "(%" << addr.cpu_base() << ",%"
49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50 case 1:
51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53 }
54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56 case 2:
57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59 }
60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62 default:
63 return os << "<address?>";
64 }
65 }
66
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68 if (has_AVX_ || has_AVX2_) {
69 return true;
70 }
71 return false;
72 }
73
74
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77 EmitOptionalRex32(reg);
78 EmitUint8(0xFF);
79 EmitRegisterOperand(2, reg.LowBits());
80 }
81
82
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85 EmitOptionalRex32(address);
86 EmitUint8(0xFF);
87 EmitOperand(2, address);
88 }
89
90
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93 EmitUint8(0xE8);
94 static const int kSize = 5;
95 // Offset by one because we already have emitted the opcode.
96 EmitLabel(label, kSize - 1);
97 }
98
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101 EmitOptionalRex32(reg);
102 EmitUint8(0x50 + reg.LowBits());
103 }
104
105
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108 EmitOptionalRex32(address);
109 EmitUint8(0xFF);
110 EmitOperand(6, address);
111 }
112
113
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116 CHECK(imm.is_int32()); // pushq only supports 32b immediate.
117 if (imm.is_int8()) {
118 EmitUint8(0x6A);
119 EmitUint8(imm.value() & 0xFF);
120 } else {
121 EmitUint8(0x68);
122 EmitImmediate(imm);
123 }
124 }
125
126
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129 EmitOptionalRex32(reg);
130 EmitUint8(0x58 + reg.LowBits());
131 }
132
133
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136 EmitOptionalRex32(address);
137 EmitUint8(0x8F);
138 EmitOperand(0, address);
139 }
140
141
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144 if (imm.is_int32()) {
145 // 32 bit. Note: sign-extends.
146 EmitRex64(dst);
147 EmitUint8(0xC7);
148 EmitRegisterOperand(0, dst.LowBits());
149 EmitInt32(static_cast<int32_t>(imm.value()));
150 } else {
151 EmitRex64(dst);
152 EmitUint8(0xB8 + dst.LowBits());
153 EmitInt64(imm.value());
154 }
155 }
156
157
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159 CHECK(imm.is_int32());
160 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161 EmitOptionalRex32(dst);
162 EmitUint8(0xB8 + dst.LowBits());
163 EmitImmediate(imm);
164 }
165
166
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168 CHECK(imm.is_int32());
169 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170 EmitRex64(dst);
171 EmitUint8(0xC7);
172 EmitOperand(0, dst);
173 EmitImmediate(imm);
174 }
175
176
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180 EmitRex64(src, dst);
181 EmitUint8(0x89);
182 EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184
185
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188 EmitOptionalRex32(dst, src);
189 EmitUint8(0x8B);
190 EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192
193
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196 EmitRex64(dst, src);
197 EmitUint8(0x8B);
198 EmitOperand(dst.LowBits(), src);
199 }
200
201
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204 EmitOptionalRex32(dst, src);
205 EmitUint8(0x8B);
206 EmitOperand(dst.LowBits(), src);
207 }
208
209
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212 EmitRex64(src, dst);
213 EmitUint8(0x89);
214 EmitOperand(src.LowBits(), dst);
215 }
216
217
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220 EmitOptionalRex32(src, dst);
221 EmitUint8(0x89);
222 EmitOperand(src.LowBits(), dst);
223 }
224
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227 EmitOptionalRex32(dst);
228 EmitUint8(0xC7);
229 EmitOperand(0, dst);
230 EmitImmediate(imm);
231 }
232
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235 EmitOptionalRex32(src, dst);
236 EmitUint8(0x0F);
237 EmitUint8(0xC3);
238 EmitOperand(src.LowBits(), dst);
239 }
240
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243 EmitRex64(src, dst);
244 EmitUint8(0x0F);
245 EmitUint8(0xC3);
246 EmitOperand(src.LowBits(), dst);
247 }
248
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250 cmov(c, dst, src, true);
251 }
252
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256 EmitUint8(0x0F);
257 EmitUint8(0x40 + c);
258 EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260
261
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264 if (is64bit) {
265 EmitRex64(dst, src);
266 } else {
267 EmitOptionalRex32(dst, src);
268 }
269 EmitUint8(0x0F);
270 EmitUint8(0x40 + c);
271 EmitOperand(dst.LowBits(), src);
272 }
273
274
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277 EmitOptionalByteRegNormalizingRex32(dst, src);
278 EmitUint8(0x0F);
279 EmitUint8(0xB6);
280 EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282
283
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286 // Byte register is only in the source register form, so we don't use
287 // EmitOptionalByteRegNormalizingRex32(dst, src);
288 EmitOptionalRex32(dst, src);
289 EmitUint8(0x0F);
290 EmitUint8(0xB6);
291 EmitOperand(dst.LowBits(), src);
292 }
293
294
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297 EmitOptionalByteRegNormalizingRex32(dst, src);
298 EmitUint8(0x0F);
299 EmitUint8(0xBE);
300 EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302
303
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306 // Byte register is only in the source register form, so we don't use
307 // EmitOptionalByteRegNormalizingRex32(dst, src);
308 EmitOptionalRex32(dst, src);
309 EmitUint8(0x0F);
310 EmitUint8(0xBE);
311 EmitOperand(dst.LowBits(), src);
312 }
313
314
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316 LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318
319
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322 EmitOptionalByteRegNormalizingRex32(src, dst);
323 EmitUint8(0x88);
324 EmitOperand(src.LowBits(), dst);
325 }
326
327
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330 EmitOptionalRex32(dst);
331 EmitUint8(0xC6);
332 EmitOperand(Register::RAX, dst);
333 CHECK(imm.is_int8());
334 EmitUint8(imm.value() & 0xFF);
335 }
336
337
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340 EmitOptionalRex32(dst, src);
341 EmitUint8(0x0F);
342 EmitUint8(0xB7);
343 EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345
346
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349 EmitOptionalRex32(dst, src);
350 EmitUint8(0x0F);
351 EmitUint8(0xB7);
352 EmitOperand(dst.LowBits(), src);
353 }
354
355
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358 EmitOptionalRex32(dst, src);
359 EmitUint8(0x0F);
360 EmitUint8(0xBF);
361 EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363
364
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367 EmitOptionalRex32(dst, src);
368 EmitUint8(0x0F);
369 EmitUint8(0xBF);
370 EmitOperand(dst.LowBits(), src);
371 }
372
373
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375 LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377
378
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381 EmitOperandSizeOverride();
382 EmitOptionalRex32(src, dst);
383 EmitUint8(0x89);
384 EmitOperand(src.LowBits(), dst);
385 }
386
387
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390 EmitOperandSizeOverride();
391 EmitOptionalRex32(dst);
392 EmitUint8(0xC7);
393 EmitOperand(Register::RAX, dst);
394 CHECK(imm.is_uint16() || imm.is_int16());
395 EmitUint8(imm.value() & 0xFF);
396 EmitUint8(imm.value() >> 8);
397 }
398
399
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402 EmitRex64(dst, src);
403 EmitUint8(0x8D);
404 EmitOperand(dst.LowBits(), src);
405 }
406
407
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410 EmitOptionalRex32(dst, src);
411 EmitUint8(0x8D);
412 EmitOperand(dst.LowBits(), src);
413 }
414
415
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417 if (CpuHasAVXorAVX2FeatureFlag()) {
418 vmovaps(dst, src);
419 return;
420 }
421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422 EmitOptionalRex32(dst, src);
423 EmitUint8(0x0F);
424 EmitUint8(0x28);
425 EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427
428
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431 DCHECK(CpuHasAVXorAVX2FeatureFlag());
432 uint8_t byte_zero, byte_one, byte_two;
433 bool is_twobyte_form = true;
434 bool load = dst.NeedsRex();
435 bool store = !load;
436
437 if (src.NeedsRex()&& dst.NeedsRex()) {
438 is_twobyte_form = false;
439 }
440 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441 // Instruction VEX Prefix
442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444 if (is_twobyte_form) {
445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446 byte_one = EmitVexPrefixByteOne(rex_bit,
447 vvvv_reg,
448 SET_VEX_L_128,
449 SET_VEX_PP_NONE);
450 } else {
451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452 /*X=*/ false,
453 src.NeedsRex(),
454 SET_VEX_M_0F);
455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456 SET_VEX_L_128,
457 SET_VEX_PP_NONE);
458 }
459 EmitUint8(byte_zero);
460 EmitUint8(byte_one);
461 if (!is_twobyte_form) {
462 EmitUint8(byte_two);
463 }
464 // Instruction Opcode
465 if (is_twobyte_form && store) {
466 EmitUint8(0x29);
467 } else {
468 EmitUint8(0x28);
469 }
470 // Instruction Operands
471 if (is_twobyte_form && store) {
472 EmitXmmRegisterOperand(src.LowBits(), dst);
473 } else {
474 EmitXmmRegisterOperand(dst.LowBits(), src);
475 }
476 }
477
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479 if (CpuHasAVXorAVX2FeatureFlag()) {
480 vmovaps(dst, src);
481 return;
482 }
483 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484 EmitOptionalRex32(dst, src);
485 EmitUint8(0x0F);
486 EmitUint8(0x28);
487 EmitOperand(dst.LowBits(), src);
488 }
489
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492 DCHECK(CpuHasAVXorAVX2FeatureFlag());
493 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494 uint8_t ByteZero, ByteOne, ByteTwo;
495 bool is_twobyte_form = false;
496 // Instruction VEX Prefix
497 uint8_t rex = src.rex();
498 bool Rex_x = rex & GET_REX_X;
499 bool Rex_b = rex & GET_REX_B;
500 if (!Rex_b && !Rex_x) {
501 is_twobyte_form = true;
502 }
503 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504 if (is_twobyte_form) {
505 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507 vvvv_reg,
508 SET_VEX_L_128,
509 SET_VEX_PP_NONE);
510 } else {
511 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512 Rex_x,
513 Rex_b,
514 SET_VEX_M_0F);
515 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516 SET_VEX_L_128,
517 SET_VEX_PP_NONE);
518 }
519 EmitUint8(ByteZero);
520 EmitUint8(ByteOne);
521 if (!is_twobyte_form) {
522 EmitUint8(ByteTwo);
523 }
524 // Instruction Opcode
525 EmitUint8(0x28);
526 // Instruction Operands
527 EmitOperand(dst.LowBits(), src);
528 }
529
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531 if (CpuHasAVXorAVX2FeatureFlag()) {
532 vmovups(dst, src);
533 return;
534 }
535 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536 EmitOptionalRex32(dst, src);
537 EmitUint8(0x0F);
538 EmitUint8(0x10);
539 EmitOperand(dst.LowBits(), src);
540 }
541
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544 DCHECK(CpuHasAVXorAVX2FeatureFlag());
545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546 uint8_t ByteZero, ByteOne, ByteTwo;
547 bool is_twobyte_form = false;
548 // Instruction VEX Prefix
549 uint8_t rex = src.rex();
550 bool Rex_x = rex & GET_REX_X;
551 bool Rex_b = rex & GET_REX_B;
552 if (!Rex_x && !Rex_b) {
553 is_twobyte_form = true;
554 }
555 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556 if (is_twobyte_form) {
557 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559 vvvv_reg,
560 SET_VEX_L_128,
561 SET_VEX_PP_NONE);
562 } else {
563 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564 Rex_x,
565 Rex_b,
566 SET_VEX_M_0F);
567 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568 SET_VEX_L_128,
569 SET_VEX_PP_NONE);
570 }
571 EmitUint8(ByteZero);
572 EmitUint8(ByteOne);
573 if (!is_twobyte_form) {
574 EmitUint8(ByteTwo);
575 }
576 // Instruction Opcode
577 EmitUint8(0x10);
578 // Instruction Operands
579 EmitOperand(dst.LowBits(), src);
580 }
581
582
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584 if (CpuHasAVXorAVX2FeatureFlag()) {
585 vmovaps(dst, src);
586 return;
587 }
588 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589 EmitOptionalRex32(src, dst);
590 EmitUint8(0x0F);
591 EmitUint8(0x29);
592 EmitOperand(src.LowBits(), dst);
593 }
594
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597 DCHECK(CpuHasAVXorAVX2FeatureFlag());
598 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599 uint8_t ByteZero, ByteOne, ByteTwo;
600 bool is_twobyte_form = false;
601
602 // Instruction VEX Prefix
603 uint8_t rex = dst.rex();
604 bool Rex_x = rex & GET_REX_X;
605 bool Rex_b = rex & GET_REX_B;
606 if (!Rex_b && !Rex_x) {
607 is_twobyte_form = true;
608 }
609 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610 if (is_twobyte_form) {
611 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613 vvvv_reg,
614 SET_VEX_L_128,
615 SET_VEX_PP_NONE);
616 } else {
617 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618 Rex_x,
619 Rex_b,
620 SET_VEX_M_0F);
621 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622 SET_VEX_L_128,
623 SET_VEX_PP_NONE);
624 }
625 EmitUint8(ByteZero);
626 EmitUint8(ByteOne);
627 if (!is_twobyte_form) {
628 EmitUint8(ByteTwo);
629 }
630 // Instruction Opcode
631 EmitUint8(0x29);
632 // Instruction Operands
633 EmitOperand(src.LowBits(), dst);
634 }
635
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637 if (CpuHasAVXorAVX2FeatureFlag()) {
638 vmovups(dst, src);
639 return;
640 }
641 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642 EmitOptionalRex32(src, dst);
643 EmitUint8(0x0F);
644 EmitUint8(0x11);
645 EmitOperand(src.LowBits(), dst);
646 }
647
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650 DCHECK(CpuHasAVXorAVX2FeatureFlag());
651 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652 uint8_t ByteZero, ByteOne, ByteTwo;
653 bool is_twobyte_form = false;
654
655 // Instruction VEX Prefix
656 uint8_t rex = dst.rex();
657 bool Rex_x = rex & GET_REX_X;
658 bool Rex_b = rex & GET_REX_B;
659 if (!Rex_b && !Rex_x) {
660 is_twobyte_form = true;
661 }
662 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663 if (is_twobyte_form) {
664 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666 vvvv_reg,
667 SET_VEX_L_128,
668 SET_VEX_PP_NONE);
669 } else {
670 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671 Rex_x,
672 Rex_b,
673 SET_VEX_M_0F);
674 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675 SET_VEX_L_128,
676 SET_VEX_PP_NONE);
677 }
678 EmitUint8(ByteZero);
679 EmitUint8(ByteOne);
680 if (!is_twobyte_form) {
681 EmitUint8(ByteTwo);
682 }
683 // Instruction Opcode
684 EmitUint8(0x11);
685 // Instruction Operands
686 EmitOperand(src.LowBits(), dst);
687 }
688
689
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692 EmitUint8(0xF3);
693 EmitOptionalRex32(dst, src);
694 EmitUint8(0x0F);
695 EmitUint8(0x10);
696 EmitOperand(dst.LowBits(), src);
697 }
698
699
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702 EmitUint8(0xF3);
703 EmitOptionalRex32(src, dst);
704 EmitUint8(0x0F);
705 EmitUint8(0x11);
706 EmitOperand(src.LowBits(), dst);
707 }
708
709
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712 EmitUint8(0xF3);
713 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM.
714 EmitUint8(0x0F);
715 EmitUint8(0x11);
716 EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718
719
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722 EmitRex64(dst, src);
723 EmitUint8(0x63);
724 EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726
727
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730 EmitRex64(dst, src);
731 EmitUint8(0x63);
732 EmitOperand(dst.LowBits(), src);
733 }
734
735
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737 movd(dst, src, true);
738 }
739
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741 movd(dst, src, true);
742 }
743
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746 EmitUint8(0x66);
747 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748 EmitUint8(0x0F);
749 EmitUint8(0x6E);
750 EmitOperand(dst.LowBits(), Operand(src));
751 }
752
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755 EmitUint8(0x66);
756 EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757 EmitUint8(0x0F);
758 EmitUint8(0x7E);
759 EmitOperand(src.LowBits(), Operand(dst));
760 }
761
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764 EmitUint8(0xF3);
765 EmitOptionalRex32(dst, src);
766 EmitUint8(0x0F);
767 EmitUint8(0x58);
768 EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773 EmitUint8(0xF3);
774 EmitOptionalRex32(dst, src);
775 EmitUint8(0x0F);
776 EmitUint8(0x58);
777 EmitOperand(dst.LowBits(), src);
778 }
779
780
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783 EmitUint8(0xF3);
784 EmitOptionalRex32(dst, src);
785 EmitUint8(0x0F);
786 EmitUint8(0x5C);
787 EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789
790
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793 EmitUint8(0xF3);
794 EmitOptionalRex32(dst, src);
795 EmitUint8(0x0F);
796 EmitUint8(0x5C);
797 EmitOperand(dst.LowBits(), src);
798 }
799
800
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803 EmitUint8(0xF3);
804 EmitOptionalRex32(dst, src);
805 EmitUint8(0x0F);
806 EmitUint8(0x59);
807 EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809
810
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813 EmitUint8(0xF3);
814 EmitOptionalRex32(dst, src);
815 EmitUint8(0x0F);
816 EmitUint8(0x59);
817 EmitOperand(dst.LowBits(), src);
818 }
819
820
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823 EmitUint8(0xF3);
824 EmitOptionalRex32(dst, src);
825 EmitUint8(0x0F);
826 EmitUint8(0x5E);
827 EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829
830
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833 EmitUint8(0xF3);
834 EmitOptionalRex32(dst, src);
835 EmitUint8(0x0F);
836 EmitUint8(0x5E);
837 EmitOperand(dst.LowBits(), src);
838 }
839
840
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843 EmitOptionalRex32(dst, src);
844 EmitUint8(0x0F);
845 EmitUint8(0x58);
846 EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848
849
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852 EmitOptionalRex32(dst, src);
853 EmitUint8(0x0F);
854 EmitUint8(0x5C);
855 EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859 DCHECK(CpuHasAVXorAVX2FeatureFlag());
860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861 bool is_twobyte_form = false;
862 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863 if (!add_right.NeedsRex()) {
864 is_twobyte_form = true;
865 }
866 X86_64ManagedRegister vvvv_reg =
867 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869 if (is_twobyte_form) {
870 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871 } else {
872 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873 /*X=*/ false,
874 add_right.NeedsRex(),
875 SET_VEX_M_0F);
876 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877 }
878 EmitUint8(ByteZero);
879 EmitUint8(ByteOne);
880 if (!is_twobyte_form) {
881 EmitUint8(ByteTwo);
882 }
883 EmitUint8(0x58);
884 EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888 DCHECK(CpuHasAVXorAVX2FeatureFlag());
889 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890 bool is_twobyte_form = false;
891 uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892 if (!src2.NeedsRex()) {
893 is_twobyte_form = true;
894 }
895 byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897 if (is_twobyte_form) {
898 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899 } else {
900 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902 }
903 EmitUint8(byte_zero);
904 EmitUint8(byte_one);
905 if (!is_twobyte_form) {
906 EmitUint8(byte_two);
907 }
908 EmitUint8(0x5C);
909 EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911
912
mulps(XmmRegister dst,XmmRegister src)913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915 EmitOptionalRex32(dst, src);
916 EmitUint8(0x0F);
917 EmitUint8(0x59);
918 EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922 DCHECK(CpuHasAVXorAVX2FeatureFlag());
923 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924 bool is_twobyte_form = false;
925 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926 if (!src2.NeedsRex()) {
927 is_twobyte_form = true;
928 }
929 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930 X86_64ManagedRegister vvvv_reg =
931 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932 if (is_twobyte_form) {
933 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934 } else {
935 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936 /*X=*/ false,
937 src2.NeedsRex(),
938 SET_VEX_M_0F);
939 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940 }
941 EmitUint8(ByteZero);
942 EmitUint8(ByteOne);
943 if (!is_twobyte_form) {
944 EmitUint8(ByteTwo);
945 }
946 EmitUint8(0x59);
947 EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949
divps(XmmRegister dst,XmmRegister src)950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952 EmitOptionalRex32(dst, src);
953 EmitUint8(0x0F);
954 EmitUint8(0x5E);
955 EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959 DCHECK(CpuHasAVXorAVX2FeatureFlag());
960 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961 bool is_twobyte_form = false;
962 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963 if (!src2.NeedsRex()) {
964 is_twobyte_form = true;
965 }
966 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967 X86_64ManagedRegister vvvv_reg =
968 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969 if (is_twobyte_form) {
970 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971 } else {
972 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973 /*X=*/ false,
974 src2.NeedsRex(),
975 SET_VEX_M_0F);
976 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977 }
978 EmitUint8(ByteZero);
979 EmitUint8(ByteOne);
980 if (!is_twobyte_form) {
981 EmitUint8(ByteTwo);
982 }
983 EmitUint8(0x5E);
984 EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)987 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
988 DCHECK(CpuHasAVXorAVX2FeatureFlag());
989 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
990 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
991 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
992 X86_64ManagedRegister vvvv_reg =
993 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
994 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
995 /*X=*/ false,
996 right.NeedsRex(),
997 SET_VEX_M_0F_38);
998 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
999 EmitUint8(ByteZero);
1000 EmitUint8(ByteOne);
1001 EmitUint8(ByteTwo);
1002 EmitUint8(0xA9);
1003 EmitXmmRegisterOperand(acc.LowBits(), right);
1004 }
1005
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)1006 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
1007 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1008 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1009 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1010 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1011 X86_64ManagedRegister vvvv_reg =
1012 X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
1013 ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
1014 /*X=*/ false,
1015 right.NeedsRex(),
1016 SET_VEX_M_0F_38);
1017 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1018 EmitUint8(ByteZero);
1019 EmitUint8(ByteOne);
1020 EmitUint8(ByteTwo);
1021 EmitUint8(0xA9);
1022 EmitXmmRegisterOperand(acc.LowBits(), right);
1023 }
flds(const Address & src)1024 void X86_64Assembler::flds(const Address& src) {
1025 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1026 EmitUint8(0xD9);
1027 EmitOperand(0, src);
1028 }
1029
1030
fsts(const Address & dst)1031 void X86_64Assembler::fsts(const Address& dst) {
1032 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1033 EmitUint8(0xD9);
1034 EmitOperand(2, dst);
1035 }
1036
1037
fstps(const Address & dst)1038 void X86_64Assembler::fstps(const Address& dst) {
1039 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1040 EmitUint8(0xD9);
1041 EmitOperand(3, dst);
1042 }
1043
1044
movapd(XmmRegister dst,XmmRegister src)1045 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1046 if (CpuHasAVXorAVX2FeatureFlag()) {
1047 vmovapd(dst, src);
1048 return;
1049 }
1050 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1051 EmitUint8(0x66);
1052 EmitOptionalRex32(dst, src);
1053 EmitUint8(0x0F);
1054 EmitUint8(0x28);
1055 EmitXmmRegisterOperand(dst.LowBits(), src);
1056 }
1057
1058 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1059 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1060 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1062 uint8_t ByteZero, ByteOne, ByteTwo;
1063 bool is_twobyte_form = true;
1064
1065 if (src.NeedsRex() && dst.NeedsRex()) {
1066 is_twobyte_form = false;
1067 }
1068 // Instruction VEX Prefix
1069 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1070 bool load = dst.NeedsRex();
1071 if (is_twobyte_form) {
1072 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1073 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1074 ByteOne = EmitVexPrefixByteOne(rex_bit,
1075 vvvv_reg,
1076 SET_VEX_L_128,
1077 SET_VEX_PP_66);
1078 } else {
1079 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1080 /*X=*/ false,
1081 src.NeedsRex(),
1082 SET_VEX_M_0F);
1083 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1084 SET_VEX_L_128,
1085 SET_VEX_PP_66);
1086 }
1087 EmitUint8(ByteZero);
1088 EmitUint8(ByteOne);
1089 if (!is_twobyte_form) {
1090 EmitUint8(ByteTwo);
1091 }
1092 // Instruction Opcode
1093 if (is_twobyte_form && !load) {
1094 EmitUint8(0x29);
1095 } else {
1096 EmitUint8(0x28);
1097 }
1098 // Instruction Operands
1099 if (is_twobyte_form && !load) {
1100 EmitXmmRegisterOperand(src.LowBits(), dst);
1101 } else {
1102 EmitXmmRegisterOperand(dst.LowBits(), src);
1103 }
1104 }
1105
movapd(XmmRegister dst,const Address & src)1106 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1107 if (CpuHasAVXorAVX2FeatureFlag()) {
1108 vmovapd(dst, src);
1109 return;
1110 }
1111 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1112 EmitUint8(0x66);
1113 EmitOptionalRex32(dst, src);
1114 EmitUint8(0x0F);
1115 EmitUint8(0x28);
1116 EmitOperand(dst.LowBits(), src);
1117 }
1118
1119 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1120 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1121 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1122 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1123 uint8_t ByteZero, ByteOne, ByteTwo;
1124 bool is_twobyte_form = false;
1125
1126 // Instruction VEX Prefix
1127 uint8_t rex = src.rex();
1128 bool Rex_x = rex & GET_REX_X;
1129 bool Rex_b = rex & GET_REX_B;
1130 if (!Rex_b && !Rex_x) {
1131 is_twobyte_form = true;
1132 }
1133 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1134 if (is_twobyte_form) {
1135 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1136 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1137 vvvv_reg,
1138 SET_VEX_L_128,
1139 SET_VEX_PP_66);
1140 } else {
1141 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1142 Rex_x,
1143 Rex_b,
1144 SET_VEX_M_0F);
1145 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1146 SET_VEX_L_128,
1147 SET_VEX_PP_66);
1148 }
1149 EmitUint8(ByteZero);
1150 EmitUint8(ByteOne);
1151 if (!is_twobyte_form) {
1152 EmitUint8(ByteTwo);
1153 }
1154 // Instruction Opcode
1155 EmitUint8(0x28);
1156 // Instruction Operands
1157 EmitOperand(dst.LowBits(), src);
1158 }
1159
movupd(XmmRegister dst,const Address & src)1160 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1161 if (CpuHasAVXorAVX2FeatureFlag()) {
1162 vmovupd(dst, src);
1163 return;
1164 }
1165 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1166 EmitUint8(0x66);
1167 EmitOptionalRex32(dst, src);
1168 EmitUint8(0x0F);
1169 EmitUint8(0x10);
1170 EmitOperand(dst.LowBits(), src);
1171 }
1172
1173 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1174 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1175 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1176 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1177 bool is_twobyte_form = false;
1178 uint8_t ByteZero, ByteOne, ByteTwo;
1179
1180 // Instruction VEX Prefix
1181 uint8_t rex = src.rex();
1182 bool Rex_x = rex & GET_REX_X;
1183 bool Rex_b = rex & GET_REX_B;
1184 if (!Rex_b && !Rex_x) {
1185 is_twobyte_form = true;
1186 }
1187 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1188 if (is_twobyte_form) {
1189 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1190 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1191 vvvv_reg,
1192 SET_VEX_L_128,
1193 SET_VEX_PP_66);
1194 } else {
1195 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1196 Rex_x,
1197 Rex_b,
1198 SET_VEX_M_0F);
1199 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1200 SET_VEX_L_128,
1201 SET_VEX_PP_66);
1202 }
1203 EmitUint8(ByteZero);
1204 EmitUint8(ByteOne);
1205 if (!is_twobyte_form)
1206 EmitUint8(ByteTwo);
1207 // Instruction Opcode
1208 EmitUint8(0x10);
1209 // Instruction Operands
1210 EmitOperand(dst.LowBits(), src);
1211 }
1212
movapd(const Address & dst,XmmRegister src)1213 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1214 if (CpuHasAVXorAVX2FeatureFlag()) {
1215 vmovapd(dst, src);
1216 return;
1217 }
1218 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1219 EmitUint8(0x66);
1220 EmitOptionalRex32(src, dst);
1221 EmitUint8(0x0F);
1222 EmitUint8(0x29);
1223 EmitOperand(src.LowBits(), dst);
1224 }
1225
1226 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1227 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1228 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1229 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1230 bool is_twobyte_form = false;
1231 uint8_t ByteZero, ByteOne, ByteTwo;
1232 // Instruction VEX Prefix
1233 uint8_t rex = dst.rex();
1234 bool Rex_x = rex & GET_REX_X;
1235 bool Rex_b = rex & GET_REX_B;
1236 if (!Rex_x && !Rex_b) {
1237 is_twobyte_form = true;
1238 }
1239 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1240 if (is_twobyte_form) {
1241 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1242 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1243 vvvv_reg,
1244 SET_VEX_L_128,
1245 SET_VEX_PP_66);
1246 } else {
1247 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1248 Rex_x,
1249 Rex_b,
1250 SET_VEX_M_0F);
1251 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1252 SET_VEX_L_128,
1253 SET_VEX_PP_66);
1254 }
1255 EmitUint8(ByteZero);
1256 EmitUint8(ByteOne);
1257 if (!is_twobyte_form) {
1258 EmitUint8(ByteTwo);
1259 }
1260 // Instruction Opcode
1261 EmitUint8(0x29);
1262 // Instruction Operands
1263 EmitOperand(src.LowBits(), dst);
1264 }
1265
movupd(const Address & dst,XmmRegister src)1266 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1267 if (CpuHasAVXorAVX2FeatureFlag()) {
1268 vmovupd(dst, src);
1269 return;
1270 }
1271 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1272 EmitUint8(0x66);
1273 EmitOptionalRex32(src, dst);
1274 EmitUint8(0x0F);
1275 EmitUint8(0x11);
1276 EmitOperand(src.LowBits(), dst);
1277 }
1278
1279 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1280 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1281 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1282 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1283 bool is_twobyte_form = false;
1284 uint8_t ByteZero, ByteOne, ByteTwo;
1285
1286 // Instruction VEX Prefix
1287 uint8_t rex = dst.rex();
1288 bool Rex_x = rex & GET_REX_X;
1289 bool Rex_b = rex & GET_REX_B;
1290 if (!Rex_x && !Rex_b) {
1291 is_twobyte_form = true;
1292 }
1293 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1294 if (is_twobyte_form) {
1295 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1296 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1297 vvvv_reg,
1298 SET_VEX_L_128,
1299 SET_VEX_PP_66);
1300 } else {
1301 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1302 Rex_x,
1303 Rex_b,
1304 SET_VEX_M_0F);
1305 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1306 SET_VEX_L_128,
1307 SET_VEX_PP_66);
1308 }
1309 EmitUint8(ByteZero);
1310 EmitUint8(ByteOne);
1311 if (!is_twobyte_form) {
1312 EmitUint8(ByteTwo);
1313 }
1314 // Instruction Opcode
1315 EmitUint8(0x11);
1316 // Instruction Operands
1317 EmitOperand(src.LowBits(), dst);
1318 }
1319
1320
movsd(XmmRegister dst,const Address & src)1321 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1322 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1323 EmitUint8(0xF2);
1324 EmitOptionalRex32(dst, src);
1325 EmitUint8(0x0F);
1326 EmitUint8(0x10);
1327 EmitOperand(dst.LowBits(), src);
1328 }
1329
1330
movsd(const Address & dst,XmmRegister src)1331 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1332 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1333 EmitUint8(0xF2);
1334 EmitOptionalRex32(src, dst);
1335 EmitUint8(0x0F);
1336 EmitUint8(0x11);
1337 EmitOperand(src.LowBits(), dst);
1338 }
1339
1340
movsd(XmmRegister dst,XmmRegister src)1341 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1342 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1343 EmitUint8(0xF2);
1344 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM.
1345 EmitUint8(0x0F);
1346 EmitUint8(0x11);
1347 EmitXmmRegisterOperand(src.LowBits(), dst);
1348 }
1349
1350
addsd(XmmRegister dst,XmmRegister src)1351 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1352 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1353 EmitUint8(0xF2);
1354 EmitOptionalRex32(dst, src);
1355 EmitUint8(0x0F);
1356 EmitUint8(0x58);
1357 EmitXmmRegisterOperand(dst.LowBits(), src);
1358 }
1359
1360
addsd(XmmRegister dst,const Address & src)1361 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1362 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1363 EmitUint8(0xF2);
1364 EmitOptionalRex32(dst, src);
1365 EmitUint8(0x0F);
1366 EmitUint8(0x58);
1367 EmitOperand(dst.LowBits(), src);
1368 }
1369
1370
subsd(XmmRegister dst,XmmRegister src)1371 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1372 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1373 EmitUint8(0xF2);
1374 EmitOptionalRex32(dst, src);
1375 EmitUint8(0x0F);
1376 EmitUint8(0x5C);
1377 EmitXmmRegisterOperand(dst.LowBits(), src);
1378 }
1379
1380
subsd(XmmRegister dst,const Address & src)1381 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1382 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1383 EmitUint8(0xF2);
1384 EmitOptionalRex32(dst, src);
1385 EmitUint8(0x0F);
1386 EmitUint8(0x5C);
1387 EmitOperand(dst.LowBits(), src);
1388 }
1389
1390
mulsd(XmmRegister dst,XmmRegister src)1391 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1392 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1393 EmitUint8(0xF2);
1394 EmitOptionalRex32(dst, src);
1395 EmitUint8(0x0F);
1396 EmitUint8(0x59);
1397 EmitXmmRegisterOperand(dst.LowBits(), src);
1398 }
1399
1400
mulsd(XmmRegister dst,const Address & src)1401 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1402 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1403 EmitUint8(0xF2);
1404 EmitOptionalRex32(dst, src);
1405 EmitUint8(0x0F);
1406 EmitUint8(0x59);
1407 EmitOperand(dst.LowBits(), src);
1408 }
1409
1410
divsd(XmmRegister dst,XmmRegister src)1411 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1412 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1413 EmitUint8(0xF2);
1414 EmitOptionalRex32(dst, src);
1415 EmitUint8(0x0F);
1416 EmitUint8(0x5E);
1417 EmitXmmRegisterOperand(dst.LowBits(), src);
1418 }
1419
1420
divsd(XmmRegister dst,const Address & src)1421 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1422 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1423 EmitUint8(0xF2);
1424 EmitOptionalRex32(dst, src);
1425 EmitUint8(0x0F);
1426 EmitUint8(0x5E);
1427 EmitOperand(dst.LowBits(), src);
1428 }
1429
1430
addpd(XmmRegister dst,XmmRegister src)1431 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1432 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1433 EmitUint8(0x66);
1434 EmitOptionalRex32(dst, src);
1435 EmitUint8(0x0F);
1436 EmitUint8(0x58);
1437 EmitXmmRegisterOperand(dst.LowBits(), src);
1438 }
1439
1440
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1441 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1442 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1443 bool is_twobyte_form = false;
1444 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1445 if (!add_right.NeedsRex()) {
1446 is_twobyte_form = true;
1447 }
1448 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1449 X86_64ManagedRegister vvvv_reg =
1450 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1451 if (is_twobyte_form) {
1452 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1453 } else {
1454 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1455 /*X=*/ false,
1456 add_right.NeedsRex(),
1457 SET_VEX_M_0F);
1458 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1459 }
1460 EmitUint8(ByteZero);
1461 EmitUint8(ByteOne);
1462 if (!is_twobyte_form) {
1463 EmitUint8(ByteTwo);
1464 }
1465 EmitUint8(0x58);
1466 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1467 }
1468
1469
subpd(XmmRegister dst,XmmRegister src)1470 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1471 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1472 EmitUint8(0x66);
1473 EmitOptionalRex32(dst, src);
1474 EmitUint8(0x0F);
1475 EmitUint8(0x5C);
1476 EmitXmmRegisterOperand(dst.LowBits(), src);
1477 }
1478
1479
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1480 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1481 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1482 bool is_twobyte_form = false;
1483 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1484 if (!src2.NeedsRex()) {
1485 is_twobyte_form = true;
1486 }
1487 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1488 X86_64ManagedRegister vvvv_reg =
1489 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1490 if (is_twobyte_form) {
1491 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1492 } else {
1493 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1494 /*X=*/ false,
1495 src2.NeedsRex(),
1496 SET_VEX_M_0F);
1497 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1498 }
1499 EmitUint8(ByteZero);
1500 EmitUint8(ByteOne);
1501 if (!is_twobyte_form) {
1502 EmitUint8(ByteTwo);
1503 }
1504 EmitUint8(0x5C);
1505 EmitXmmRegisterOperand(dst.LowBits(), src2);
1506 }
1507
1508
mulpd(XmmRegister dst,XmmRegister src)1509 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1510 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1511 EmitUint8(0x66);
1512 EmitOptionalRex32(dst, src);
1513 EmitUint8(0x0F);
1514 EmitUint8(0x59);
1515 EmitXmmRegisterOperand(dst.LowBits(), src);
1516 }
1517
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1518 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1519 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1520 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1521 bool is_twobyte_form = false;
1522 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1523 if (!src2.NeedsRex()) {
1524 is_twobyte_form = true;
1525 }
1526 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1527 X86_64ManagedRegister vvvv_reg =
1528 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1529 if (is_twobyte_form) {
1530 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1531 } else {
1532 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1533 /*X=*/ false,
1534 src2.NeedsRex(),
1535 SET_VEX_M_0F);
1536 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1537 }
1538 EmitUint8(ByteZero);
1539 EmitUint8(ByteOne);
1540 if (!is_twobyte_form) {
1541 EmitUint8(ByteTwo);
1542 }
1543 EmitUint8(0x59);
1544 EmitXmmRegisterOperand(dst.LowBits(), src2);
1545 }
1546
divpd(XmmRegister dst,XmmRegister src)1547 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1548 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1549 EmitUint8(0x66);
1550 EmitOptionalRex32(dst, src);
1551 EmitUint8(0x0F);
1552 EmitUint8(0x5E);
1553 EmitXmmRegisterOperand(dst.LowBits(), src);
1554 }
1555
1556
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1557 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1558 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1559 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1560 bool is_twobyte_form = false;
1561 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1562 if (!src2.NeedsRex()) {
1563 is_twobyte_form = true;
1564 }
1565 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1566 X86_64ManagedRegister vvvv_reg =
1567 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1568 if (is_twobyte_form) {
1569 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1570 } else {
1571 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1572 /*X=*/ false,
1573 src2.NeedsRex(),
1574 SET_VEX_M_0F);
1575 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1576 }
1577 EmitUint8(ByteZero);
1578 EmitUint8(ByteOne);
1579 if (!is_twobyte_form) {
1580 EmitUint8(ByteTwo);
1581 }
1582 EmitUint8(0x5E);
1583 EmitXmmRegisterOperand(dst.LowBits(), src2);
1584 }
1585
1586
movdqa(XmmRegister dst,XmmRegister src)1587 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1588 if (CpuHasAVXorAVX2FeatureFlag()) {
1589 vmovdqa(dst, src);
1590 return;
1591 }
1592 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1593 EmitUint8(0x66);
1594 EmitOptionalRex32(dst, src);
1595 EmitUint8(0x0F);
1596 EmitUint8(0x6F);
1597 EmitXmmRegisterOperand(dst.LowBits(), src);
1598 }
1599
1600 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1601 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1602 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1603 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1604 uint8_t ByteZero, ByteOne, ByteTwo;
1605 bool is_twobyte_form = true;
1606
1607 // Instruction VEX Prefix
1608 if (src.NeedsRex() && dst.NeedsRex()) {
1609 is_twobyte_form = false;
1610 }
1611 bool load = dst.NeedsRex();
1612 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1613 if (is_twobyte_form) {
1614 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1615 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1616 ByteOne = EmitVexPrefixByteOne(rex_bit,
1617 vvvv_reg,
1618 SET_VEX_L_128,
1619 SET_VEX_PP_66);
1620 } else {
1621 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1622 /*X=*/ false,
1623 src.NeedsRex(),
1624 SET_VEX_M_0F);
1625 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1626 SET_VEX_L_128,
1627 SET_VEX_PP_66);
1628 }
1629 EmitUint8(ByteZero);
1630 EmitUint8(ByteOne);
1631 if (!is_twobyte_form) {
1632 EmitUint8(ByteTwo);
1633 }
1634 // Instruction Opcode
1635 if (is_twobyte_form && !load) {
1636 EmitUint8(0x7F);
1637 } else {
1638 EmitUint8(0x6F);
1639 }
1640 // Instruction Operands
1641 if (is_twobyte_form && !load) {
1642 EmitXmmRegisterOperand(src.LowBits(), dst);
1643 } else {
1644 EmitXmmRegisterOperand(dst.LowBits(), src);
1645 }
1646 }
1647
movdqa(XmmRegister dst,const Address & src)1648 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1649 if (CpuHasAVXorAVX2FeatureFlag()) {
1650 vmovdqa(dst, src);
1651 return;
1652 }
1653 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1654 EmitUint8(0x66);
1655 EmitOptionalRex32(dst, src);
1656 EmitUint8(0x0F);
1657 EmitUint8(0x6F);
1658 EmitOperand(dst.LowBits(), src);
1659 }
1660
1661 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1662 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1663 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1664 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1665 uint8_t ByteZero, ByteOne, ByteTwo;
1666 bool is_twobyte_form = false;
1667
1668 // Instruction VEX Prefix
1669 uint8_t rex = src.rex();
1670 bool Rex_x = rex & GET_REX_X;
1671 bool Rex_b = rex & GET_REX_B;
1672 if (!Rex_x && !Rex_b) {
1673 is_twobyte_form = true;
1674 }
1675 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1676 if (is_twobyte_form) {
1677 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1678 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1679 vvvv_reg,
1680 SET_VEX_L_128,
1681 SET_VEX_PP_66);
1682 } else {
1683 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1684 Rex_x,
1685 Rex_b,
1686 SET_VEX_M_0F);
1687 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1688 SET_VEX_L_128,
1689 SET_VEX_PP_66);
1690 }
1691 EmitUint8(ByteZero);
1692 EmitUint8(ByteOne);
1693 if (!is_twobyte_form) {
1694 EmitUint8(ByteTwo);
1695 }
1696 // Instruction Opcode
1697 EmitUint8(0x6F);
1698 // Instruction Operands
1699 EmitOperand(dst.LowBits(), src);
1700 }
1701
movdqu(XmmRegister dst,const Address & src)1702 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1703 if (CpuHasAVXorAVX2FeatureFlag()) {
1704 vmovdqu(dst, src);
1705 return;
1706 }
1707 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1708 EmitUint8(0xF3);
1709 EmitOptionalRex32(dst, src);
1710 EmitUint8(0x0F);
1711 EmitUint8(0x6F);
1712 EmitOperand(dst.LowBits(), src);
1713 }
1714
1715 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1716 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1717 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1718 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1719 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1720 uint8_t ByteZero, ByteOne, ByteTwo;
1721 bool is_twobyte_form = false;
1722
1723 // Instruction VEX Prefix
1724 uint8_t rex = src.rex();
1725 bool Rex_x = rex & GET_REX_X;
1726 bool Rex_b = rex & GET_REX_B;
1727 if (!Rex_x && !Rex_b) {
1728 is_twobyte_form = true;
1729 }
1730 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1731 if (is_twobyte_form) {
1732 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1733 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1734 vvvv_reg,
1735 SET_VEX_L_128,
1736 SET_VEX_PP_F3);
1737 } else {
1738 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1739 Rex_x,
1740 Rex_b,
1741 SET_VEX_M_0F);
1742 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1743 SET_VEX_L_128,
1744 SET_VEX_PP_F3);
1745 }
1746 EmitUint8(ByteZero);
1747 EmitUint8(ByteOne);
1748 if (!is_twobyte_form) {
1749 EmitUint8(ByteTwo);
1750 }
1751 // Instruction Opcode
1752 EmitUint8(0x6F);
1753 // Instruction Operands
1754 EmitOperand(dst.LowBits(), src);
1755 }
1756
movdqa(const Address & dst,XmmRegister src)1757 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1758 if (CpuHasAVXorAVX2FeatureFlag()) {
1759 vmovdqa(dst, src);
1760 return;
1761 }
1762 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1763 EmitUint8(0x66);
1764 EmitOptionalRex32(src, dst);
1765 EmitUint8(0x0F);
1766 EmitUint8(0x7F);
1767 EmitOperand(src.LowBits(), dst);
1768 }
1769
1770 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1771 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1772 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1773 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1774 bool is_twobyte_form = false;
1775 uint8_t ByteZero, ByteOne, ByteTwo;
1776 // Instruction VEX Prefix
1777 uint8_t rex = dst.rex();
1778 bool Rex_x = rex & GET_REX_X;
1779 bool Rex_b = rex & GET_REX_B;
1780 if (!Rex_x && !Rex_b) {
1781 is_twobyte_form = true;
1782 }
1783 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1784 if (is_twobyte_form) {
1785 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1786 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1787 vvvv_reg,
1788 SET_VEX_L_128,
1789 SET_VEX_PP_66);
1790 } else {
1791 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1792 Rex_x,
1793 Rex_b,
1794 SET_VEX_M_0F);
1795 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1796 SET_VEX_L_128,
1797 SET_VEX_PP_66);
1798 }
1799 EmitUint8(ByteZero);
1800 EmitUint8(ByteOne);
1801 if (!is_twobyte_form) {
1802 EmitUint8(ByteTwo);
1803 }
1804 // Instruction Opcode
1805 EmitUint8(0x7F);
1806 // Instruction Operands
1807 EmitOperand(src.LowBits(), dst);
1808 }
1809
movdqu(const Address & dst,XmmRegister src)1810 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1811 if (CpuHasAVXorAVX2FeatureFlag()) {
1812 vmovdqu(dst, src);
1813 return;
1814 }
1815 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1816 EmitUint8(0xF3);
1817 EmitOptionalRex32(src, dst);
1818 EmitUint8(0x0F);
1819 EmitUint8(0x7F);
1820 EmitOperand(src.LowBits(), dst);
1821 }
1822
1823 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1824 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1825 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1826 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1827 uint8_t ByteZero, ByteOne, ByteTwo;
1828 bool is_twobyte_form = false;
1829
1830 // Instruction VEX Prefix
1831 uint8_t rex = dst.rex();
1832 bool Rex_x = rex & GET_REX_X;
1833 bool Rex_b = rex & GET_REX_B;
1834 if (!Rex_b && !Rex_x) {
1835 is_twobyte_form = true;
1836 }
1837 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1838 if (is_twobyte_form) {
1839 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1840 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1841 vvvv_reg,
1842 SET_VEX_L_128,
1843 SET_VEX_PP_F3);
1844 } else {
1845 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1846 Rex_x,
1847 Rex_b,
1848 SET_VEX_M_0F);
1849 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1850 SET_VEX_L_128,
1851 SET_VEX_PP_F3);
1852 }
1853 EmitUint8(ByteZero);
1854 EmitUint8(ByteOne);
1855 if (!is_twobyte_form) {
1856 EmitUint8(ByteTwo);
1857 }
1858 // Instruction Opcode
1859 EmitUint8(0x7F);
1860 // Instruction Operands
1861 EmitOperand(src.LowBits(), dst);
1862 }
1863
paddb(XmmRegister dst,XmmRegister src)1864 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1865 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1866 EmitUint8(0x66);
1867 EmitOptionalRex32(dst, src);
1868 EmitUint8(0x0F);
1869 EmitUint8(0xFC);
1870 EmitXmmRegisterOperand(dst.LowBits(), src);
1871 }
1872
1873
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1874 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1875 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1876 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1877 uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1878 bool is_twobyte_form = true;
1879 if (add_right.NeedsRex()) {
1880 is_twobyte_form = false;
1881 }
1882 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1883 X86_64ManagedRegister vvvv_reg =
1884 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1885 if (is_twobyte_form) {
1886 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1887 } else {
1888 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1889 /*X=*/ false,
1890 add_right.NeedsRex(),
1891 SET_VEX_M_0F);
1892 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1893 }
1894 EmitUint8(ByteZero);
1895 EmitUint8(ByteOne);
1896 if (!is_twobyte_form) {
1897 EmitUint8(ByteTwo);
1898 }
1899 EmitUint8(0xFC);
1900 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1901 }
1902
1903
psubb(XmmRegister dst,XmmRegister src)1904 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1905 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1906 EmitUint8(0x66);
1907 EmitOptionalRex32(dst, src);
1908 EmitUint8(0x0F);
1909 EmitUint8(0xF8);
1910 EmitXmmRegisterOperand(dst.LowBits(), src);
1911 }
1912
1913
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1914 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1915 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1916 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1917 bool is_twobyte_form = false;
1918 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1919 if (!add_right.NeedsRex()) {
1920 is_twobyte_form = true;
1921 }
1922 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1923 X86_64ManagedRegister vvvv_reg =
1924 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1925 if (is_twobyte_form) {
1926 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1927 } else {
1928 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1929 /*X=*/ false,
1930 add_right.NeedsRex(),
1931 SET_VEX_M_0F);
1932 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1933 }
1934 EmitUint8(ByteZero);
1935 EmitUint8(ByteOne);
1936 if (!is_twobyte_form) {
1937 EmitUint8(ByteTwo);
1938 }
1939 EmitUint8(0xF8);
1940 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1941 }
1942
1943
paddw(XmmRegister dst,XmmRegister src)1944 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1945 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1946 EmitUint8(0x66);
1947 EmitOptionalRex32(dst, src);
1948 EmitUint8(0x0F);
1949 EmitUint8(0xFD);
1950 EmitXmmRegisterOperand(dst.LowBits(), src);
1951 }
1952
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1953 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1954 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1956 bool is_twobyte_form = false;
1957 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1958 if (!add_right.NeedsRex()) {
1959 is_twobyte_form = true;
1960 }
1961 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1962 X86_64ManagedRegister vvvv_reg =
1963 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1964 if (is_twobyte_form) {
1965 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1966 } else {
1967 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1968 /*X=*/ false,
1969 add_right.NeedsRex(),
1970 SET_VEX_M_0F);
1971 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1972 }
1973 EmitUint8(ByteZero);
1974 EmitUint8(ByteOne);
1975 if (!is_twobyte_form) {
1976 EmitUint8(ByteTwo);
1977 }
1978 EmitUint8(0xFD);
1979 EmitXmmRegisterOperand(dst.LowBits(), add_right);
1980 }
1981
1982
psubw(XmmRegister dst,XmmRegister src)1983 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1984 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1985 EmitUint8(0x66);
1986 EmitOptionalRex32(dst, src);
1987 EmitUint8(0x0F);
1988 EmitUint8(0xF9);
1989 EmitXmmRegisterOperand(dst.LowBits(), src);
1990 }
1991
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1992 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1993 DCHECK(CpuHasAVXorAVX2FeatureFlag());
1994 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1995 bool is_twobyte_form = false;
1996 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1997 if (!add_right.NeedsRex()) {
1998 is_twobyte_form = true;
1999 }
2000 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2001 X86_64ManagedRegister vvvv_reg =
2002 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2003 if (is_twobyte_form) {
2004 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2005 } else {
2006 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2007 /*X=*/ false,
2008 add_right.NeedsRex(),
2009 SET_VEX_M_0F);
2010 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2011 }
2012 EmitUint8(ByteZero);
2013 EmitUint8(ByteOne);
2014 if (!is_twobyte_form) {
2015 EmitUint8(ByteTwo);
2016 }
2017 EmitUint8(0xF9);
2018 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2019 }
2020
2021
pmullw(XmmRegister dst,XmmRegister src)2022 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
2023 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2024 EmitUint8(0x66);
2025 EmitOptionalRex32(dst, src);
2026 EmitUint8(0x0F);
2027 EmitUint8(0xD5);
2028 EmitXmmRegisterOperand(dst.LowBits(), src);
2029 }
2030
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)2031 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2032 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2033 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2034 bool is_twobyte_form = false;
2035 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2036 if (!src2.NeedsRex()) {
2037 is_twobyte_form = true;
2038 }
2039 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2040 X86_64ManagedRegister vvvv_reg =
2041 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2042 if (is_twobyte_form) {
2043 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2044 } else {
2045 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2046 /*X=*/ false,
2047 src2.NeedsRex(),
2048 SET_VEX_M_0F);
2049 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2050 }
2051 EmitUint8(ByteZero);
2052 EmitUint8(ByteOne);
2053 if (!is_twobyte_form) {
2054 EmitUint8(ByteTwo);
2055 }
2056 EmitUint8(0xD5);
2057 EmitXmmRegisterOperand(dst.LowBits(), src2);
2058 }
2059
paddd(XmmRegister dst,XmmRegister src)2060 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2061 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2062 EmitUint8(0x66);
2063 EmitOptionalRex32(dst, src);
2064 EmitUint8(0x0F);
2065 EmitUint8(0xFE);
2066 EmitXmmRegisterOperand(dst.LowBits(), src);
2067 }
2068
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2069 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2070 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2071 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2072 bool is_twobyte_form = false;
2073 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2074 if (!add_right.NeedsRex()) {
2075 is_twobyte_form = true;
2076 }
2077 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2078 X86_64ManagedRegister vvvv_reg =
2079 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2080 if (is_twobyte_form) {
2081 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2082 } else {
2083 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2084 /*X=*/ false,
2085 add_right.NeedsRex(),
2086 SET_VEX_M_0F);
2087 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2088 }
2089 EmitUint8(ByteZero);
2090 EmitUint8(ByteOne);
2091 if (!is_twobyte_form) {
2092 EmitUint8(ByteTwo);
2093 }
2094 EmitUint8(0xFE);
2095 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2096 }
2097
psubd(XmmRegister dst,XmmRegister src)2098 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2099 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2100 EmitUint8(0x66);
2101 EmitOptionalRex32(dst, src);
2102 EmitUint8(0x0F);
2103 EmitUint8(0xFA);
2104 EmitXmmRegisterOperand(dst.LowBits(), src);
2105 }
2106
2107
pmulld(XmmRegister dst,XmmRegister src)2108 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2109 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2110 EmitUint8(0x66);
2111 EmitOptionalRex32(dst, src);
2112 EmitUint8(0x0F);
2113 EmitUint8(0x38);
2114 EmitUint8(0x40);
2115 EmitXmmRegisterOperand(dst.LowBits(), src);
2116 }
2117
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2118 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2119 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2121 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2122 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2123 X86_64ManagedRegister vvvv_reg =
2124 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2125 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2126 /*X=*/ false,
2127 src2.NeedsRex(),
2128 SET_VEX_M_0F_38);
2129 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2130 EmitUint8(ByteZero);
2131 EmitUint8(ByteOne);
2132 EmitUint8(ByteTwo);
2133 EmitUint8(0x40);
2134 EmitXmmRegisterOperand(dst.LowBits(), src2);
2135 }
2136
paddq(XmmRegister dst,XmmRegister src)2137 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2138 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2139 EmitUint8(0x66);
2140 EmitOptionalRex32(dst, src);
2141 EmitUint8(0x0F);
2142 EmitUint8(0xD4);
2143 EmitXmmRegisterOperand(dst.LowBits(), src);
2144 }
2145
2146
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2147 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2148 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2149 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2150 bool is_twobyte_form = false;
2151 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2152 if (!add_right.NeedsRex()) {
2153 is_twobyte_form = true;
2154 }
2155 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2156 X86_64ManagedRegister vvvv_reg =
2157 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2158 if (is_twobyte_form) {
2159 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2160 } else {
2161 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2162 /*X=*/ false,
2163 add_right.NeedsRex(),
2164 SET_VEX_M_0F);
2165 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2166 }
2167 EmitUint8(ByteZero);
2168 EmitUint8(ByteOne);
2169 if (!is_twobyte_form) {
2170 EmitUint8(ByteTwo);
2171 }
2172 EmitUint8(0xD4);
2173 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2174 }
2175
2176
psubq(XmmRegister dst,XmmRegister src)2177 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2179 EmitUint8(0x66);
2180 EmitOptionalRex32(dst, src);
2181 EmitUint8(0x0F);
2182 EmitUint8(0xFB);
2183 EmitXmmRegisterOperand(dst.LowBits(), src);
2184 }
2185
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2186 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2187 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2188 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2189 bool is_twobyte_form = false;
2190 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2191 if (!add_right.NeedsRex()) {
2192 is_twobyte_form = true;
2193 }
2194 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2195 X86_64ManagedRegister vvvv_reg =
2196 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2197 if (is_twobyte_form) {
2198 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2199 } else {
2200 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2201 /*X=*/ false,
2202 add_right.NeedsRex(),
2203 SET_VEX_M_0F);
2204 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2205 }
2206 EmitUint8(ByteZero);
2207 EmitUint8(ByteOne);
2208 if (!is_twobyte_form) {
2209 EmitUint8(ByteTwo);
2210 }
2211 EmitUint8(0xFB);
2212 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2213 }
2214
2215
paddusb(XmmRegister dst,XmmRegister src)2216 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2217 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2218 EmitUint8(0x66);
2219 EmitOptionalRex32(dst, src);
2220 EmitUint8(0x0F);
2221 EmitUint8(0xDC);
2222 EmitXmmRegisterOperand(dst.LowBits(), src);
2223 }
2224
2225
paddsb(XmmRegister dst,XmmRegister src)2226 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2227 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2228 EmitUint8(0x66);
2229 EmitOptionalRex32(dst, src);
2230 EmitUint8(0x0F);
2231 EmitUint8(0xEC);
2232 EmitXmmRegisterOperand(dst.LowBits(), src);
2233 }
2234
2235
paddusw(XmmRegister dst,XmmRegister src)2236 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2237 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2238 EmitUint8(0x66);
2239 EmitOptionalRex32(dst, src);
2240 EmitUint8(0x0F);
2241 EmitUint8(0xDD);
2242 EmitXmmRegisterOperand(dst.LowBits(), src);
2243 }
2244
2245
paddsw(XmmRegister dst,XmmRegister src)2246 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2247 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2248 EmitUint8(0x66);
2249 EmitOptionalRex32(dst, src);
2250 EmitUint8(0x0F);
2251 EmitUint8(0xED);
2252 EmitXmmRegisterOperand(dst.LowBits(), src);
2253 }
2254
2255
psubusb(XmmRegister dst,XmmRegister src)2256 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2257 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2258 EmitUint8(0x66);
2259 EmitOptionalRex32(dst, src);
2260 EmitUint8(0x0F);
2261 EmitUint8(0xD8);
2262 EmitXmmRegisterOperand(dst.LowBits(), src);
2263 }
2264
2265
psubsb(XmmRegister dst,XmmRegister src)2266 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2267 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2268 EmitUint8(0x66);
2269 EmitOptionalRex32(dst, src);
2270 EmitUint8(0x0F);
2271 EmitUint8(0xE8);
2272 EmitXmmRegisterOperand(dst.LowBits(), src);
2273 }
2274
2275
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2276 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2277 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2278 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2279 bool is_twobyte_form = false;
2280 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2281 if (!add_right.NeedsRex()) {
2282 is_twobyte_form = true;
2283 }
2284 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2285 X86_64ManagedRegister vvvv_reg =
2286 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2287 if (is_twobyte_form) {
2288 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2289 } else {
2290 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2291 /*X=*/ false,
2292 add_right.NeedsRex(),
2293 SET_VEX_M_0F);
2294 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2295 }
2296 EmitUint8(ByteZero);
2297 EmitUint8(ByteOne);
2298 if (!is_twobyte_form) {
2299 EmitUint8(ByteTwo);
2300 }
2301 EmitUint8(0xFA);
2302 EmitXmmRegisterOperand(dst.LowBits(), add_right);
2303 }
2304
2305
psubusw(XmmRegister dst,XmmRegister src)2306 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2307 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2308 EmitUint8(0x66);
2309 EmitOptionalRex32(dst, src);
2310 EmitUint8(0x0F);
2311 EmitUint8(0xD9);
2312 EmitXmmRegisterOperand(dst.LowBits(), src);
2313 }
2314
2315
psubsw(XmmRegister dst,XmmRegister src)2316 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2317 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2318 EmitUint8(0x66);
2319 EmitOptionalRex32(dst, src);
2320 EmitUint8(0x0F);
2321 EmitUint8(0xE9);
2322 EmitXmmRegisterOperand(dst.LowBits(), src);
2323 }
2324
2325
cvtsi2ss(XmmRegister dst,CpuRegister src)2326 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2327 cvtsi2ss(dst, src, false);
2328 }
2329
2330
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2331 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2332 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2333 EmitUint8(0xF3);
2334 if (is64bit) {
2335 // Emit a REX.W prefix if the operand size is 64 bits.
2336 EmitRex64(dst, src);
2337 } else {
2338 EmitOptionalRex32(dst, src);
2339 }
2340 EmitUint8(0x0F);
2341 EmitUint8(0x2A);
2342 EmitOperand(dst.LowBits(), Operand(src));
2343 }
2344
2345
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2346 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2347 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2348 EmitUint8(0xF3);
2349 if (is64bit) {
2350 // Emit a REX.W prefix if the operand size is 64 bits.
2351 EmitRex64(dst, src);
2352 } else {
2353 EmitOptionalRex32(dst, src);
2354 }
2355 EmitUint8(0x0F);
2356 EmitUint8(0x2A);
2357 EmitOperand(dst.LowBits(), src);
2358 }
2359
2360
cvtsi2sd(XmmRegister dst,CpuRegister src)2361 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2362 cvtsi2sd(dst, src, false);
2363 }
2364
2365
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2366 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2367 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2368 EmitUint8(0xF2);
2369 if (is64bit) {
2370 // Emit a REX.W prefix if the operand size is 64 bits.
2371 EmitRex64(dst, src);
2372 } else {
2373 EmitOptionalRex32(dst, src);
2374 }
2375 EmitUint8(0x0F);
2376 EmitUint8(0x2A);
2377 EmitOperand(dst.LowBits(), Operand(src));
2378 }
2379
2380
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2381 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2382 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2383 EmitUint8(0xF2);
2384 if (is64bit) {
2385 // Emit a REX.W prefix if the operand size is 64 bits.
2386 EmitRex64(dst, src);
2387 } else {
2388 EmitOptionalRex32(dst, src);
2389 }
2390 EmitUint8(0x0F);
2391 EmitUint8(0x2A);
2392 EmitOperand(dst.LowBits(), src);
2393 }
2394
2395
cvtss2si(CpuRegister dst,XmmRegister src)2396 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2397 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2398 EmitUint8(0xF3);
2399 EmitOptionalRex32(dst, src);
2400 EmitUint8(0x0F);
2401 EmitUint8(0x2D);
2402 EmitXmmRegisterOperand(dst.LowBits(), src);
2403 }
2404
2405
cvtss2sd(XmmRegister dst,XmmRegister src)2406 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2407 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2408 EmitUint8(0xF3);
2409 EmitOptionalRex32(dst, src);
2410 EmitUint8(0x0F);
2411 EmitUint8(0x5A);
2412 EmitXmmRegisterOperand(dst.LowBits(), src);
2413 }
2414
2415
cvtss2sd(XmmRegister dst,const Address & src)2416 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2417 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2418 EmitUint8(0xF3);
2419 EmitOptionalRex32(dst, src);
2420 EmitUint8(0x0F);
2421 EmitUint8(0x5A);
2422 EmitOperand(dst.LowBits(), src);
2423 }
2424
2425
cvtsd2si(CpuRegister dst,XmmRegister src)2426 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2427 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2428 EmitUint8(0xF2);
2429 EmitOptionalRex32(dst, src);
2430 EmitUint8(0x0F);
2431 EmitUint8(0x2D);
2432 EmitXmmRegisterOperand(dst.LowBits(), src);
2433 }
2434
2435
cvttss2si(CpuRegister dst,XmmRegister src)2436 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2437 cvttss2si(dst, src, false);
2438 }
2439
2440
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2441 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2442 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2443 EmitUint8(0xF3);
2444 if (is64bit) {
2445 // Emit a REX.W prefix if the operand size is 64 bits.
2446 EmitRex64(dst, src);
2447 } else {
2448 EmitOptionalRex32(dst, src);
2449 }
2450 EmitUint8(0x0F);
2451 EmitUint8(0x2C);
2452 EmitXmmRegisterOperand(dst.LowBits(), src);
2453 }
2454
2455
cvttsd2si(CpuRegister dst,XmmRegister src)2456 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2457 cvttsd2si(dst, src, false);
2458 }
2459
2460
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2461 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2462 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2463 EmitUint8(0xF2);
2464 if (is64bit) {
2465 // Emit a REX.W prefix if the operand size is 64 bits.
2466 EmitRex64(dst, src);
2467 } else {
2468 EmitOptionalRex32(dst, src);
2469 }
2470 EmitUint8(0x0F);
2471 EmitUint8(0x2C);
2472 EmitXmmRegisterOperand(dst.LowBits(), src);
2473 }
2474
2475
cvtsd2ss(XmmRegister dst,XmmRegister src)2476 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2477 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2478 EmitUint8(0xF2);
2479 EmitOptionalRex32(dst, src);
2480 EmitUint8(0x0F);
2481 EmitUint8(0x5A);
2482 EmitXmmRegisterOperand(dst.LowBits(), src);
2483 }
2484
2485
cvtsd2ss(XmmRegister dst,const Address & src)2486 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2487 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2488 EmitUint8(0xF2);
2489 EmitOptionalRex32(dst, src);
2490 EmitUint8(0x0F);
2491 EmitUint8(0x5A);
2492 EmitOperand(dst.LowBits(), src);
2493 }
2494
2495
cvtdq2ps(XmmRegister dst,XmmRegister src)2496 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2497 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498 EmitOptionalRex32(dst, src);
2499 EmitUint8(0x0F);
2500 EmitUint8(0x5B);
2501 EmitXmmRegisterOperand(dst.LowBits(), src);
2502 }
2503
2504
cvtdq2pd(XmmRegister dst,XmmRegister src)2505 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2506 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2507 EmitUint8(0xF3);
2508 EmitOptionalRex32(dst, src);
2509 EmitUint8(0x0F);
2510 EmitUint8(0xE6);
2511 EmitXmmRegisterOperand(dst.LowBits(), src);
2512 }
2513
2514
comiss(XmmRegister a,XmmRegister b)2515 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2516 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2517 EmitOptionalRex32(a, b);
2518 EmitUint8(0x0F);
2519 EmitUint8(0x2F);
2520 EmitXmmRegisterOperand(a.LowBits(), b);
2521 }
2522
2523
comiss(XmmRegister a,const Address & b)2524 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2525 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2526 EmitOptionalRex32(a, b);
2527 EmitUint8(0x0F);
2528 EmitUint8(0x2F);
2529 EmitOperand(a.LowBits(), b);
2530 }
2531
2532
comisd(XmmRegister a,XmmRegister b)2533 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2534 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2535 EmitUint8(0x66);
2536 EmitOptionalRex32(a, b);
2537 EmitUint8(0x0F);
2538 EmitUint8(0x2F);
2539 EmitXmmRegisterOperand(a.LowBits(), b);
2540 }
2541
2542
comisd(XmmRegister a,const Address & b)2543 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2544 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2545 EmitUint8(0x66);
2546 EmitOptionalRex32(a, b);
2547 EmitUint8(0x0F);
2548 EmitUint8(0x2F);
2549 EmitOperand(a.LowBits(), b);
2550 }
2551
2552
ucomiss(XmmRegister a,XmmRegister b)2553 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2554 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2555 EmitOptionalRex32(a, b);
2556 EmitUint8(0x0F);
2557 EmitUint8(0x2E);
2558 EmitXmmRegisterOperand(a.LowBits(), b);
2559 }
2560
2561
ucomiss(XmmRegister a,const Address & b)2562 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2563 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2564 EmitOptionalRex32(a, b);
2565 EmitUint8(0x0F);
2566 EmitUint8(0x2E);
2567 EmitOperand(a.LowBits(), b);
2568 }
2569
2570
ucomisd(XmmRegister a,XmmRegister b)2571 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2572 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2573 EmitUint8(0x66);
2574 EmitOptionalRex32(a, b);
2575 EmitUint8(0x0F);
2576 EmitUint8(0x2E);
2577 EmitXmmRegisterOperand(a.LowBits(), b);
2578 }
2579
2580
ucomisd(XmmRegister a,const Address & b)2581 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2582 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2583 EmitUint8(0x66);
2584 EmitOptionalRex32(a, b);
2585 EmitUint8(0x0F);
2586 EmitUint8(0x2E);
2587 EmitOperand(a.LowBits(), b);
2588 }
2589
2590
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2591 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2592 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2593 EmitUint8(0x66);
2594 EmitOptionalRex32(dst, src);
2595 EmitUint8(0x0F);
2596 EmitUint8(0x3A);
2597 EmitUint8(0x0B);
2598 EmitXmmRegisterOperand(dst.LowBits(), src);
2599 EmitUint8(imm.value());
2600 }
2601
2602
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2603 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2604 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2605 EmitUint8(0x66);
2606 EmitOptionalRex32(dst, src);
2607 EmitUint8(0x0F);
2608 EmitUint8(0x3A);
2609 EmitUint8(0x0A);
2610 EmitXmmRegisterOperand(dst.LowBits(), src);
2611 EmitUint8(imm.value());
2612 }
2613
2614
sqrtsd(XmmRegister dst,XmmRegister src)2615 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2616 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2617 EmitUint8(0xF2);
2618 EmitOptionalRex32(dst, src);
2619 EmitUint8(0x0F);
2620 EmitUint8(0x51);
2621 EmitXmmRegisterOperand(dst.LowBits(), src);
2622 }
2623
2624
sqrtss(XmmRegister dst,XmmRegister src)2625 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2626 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2627 EmitUint8(0xF3);
2628 EmitOptionalRex32(dst, src);
2629 EmitUint8(0x0F);
2630 EmitUint8(0x51);
2631 EmitXmmRegisterOperand(dst.LowBits(), src);
2632 }
2633
2634
xorpd(XmmRegister dst,const Address & src)2635 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2636 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637 EmitUint8(0x66);
2638 EmitOptionalRex32(dst, src);
2639 EmitUint8(0x0F);
2640 EmitUint8(0x57);
2641 EmitOperand(dst.LowBits(), src);
2642 }
2643
2644
xorpd(XmmRegister dst,XmmRegister src)2645 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2646 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2647 EmitUint8(0x66);
2648 EmitOptionalRex32(dst, src);
2649 EmitUint8(0x0F);
2650 EmitUint8(0x57);
2651 EmitXmmRegisterOperand(dst.LowBits(), src);
2652 }
2653
2654
xorps(XmmRegister dst,const Address & src)2655 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2656 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2657 EmitOptionalRex32(dst, src);
2658 EmitUint8(0x0F);
2659 EmitUint8(0x57);
2660 EmitOperand(dst.LowBits(), src);
2661 }
2662
2663
xorps(XmmRegister dst,XmmRegister src)2664 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2665 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2666 EmitOptionalRex32(dst, src);
2667 EmitUint8(0x0F);
2668 EmitUint8(0x57);
2669 EmitXmmRegisterOperand(dst.LowBits(), src);
2670 }
2671
pxor(XmmRegister dst,XmmRegister src)2672 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2673 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2674 EmitUint8(0x66);
2675 EmitOptionalRex32(dst, src);
2676 EmitUint8(0x0F);
2677 EmitUint8(0xEF);
2678 EmitXmmRegisterOperand(dst.LowBits(), src);
2679 }
2680
2681 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2682 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2683 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2684 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2685 bool is_twobyte_form = false;
2686 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2687 if (!src2.NeedsRex()) {
2688 is_twobyte_form = true;
2689 }
2690 X86_64ManagedRegister vvvv_reg =
2691 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2692 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2693 if (is_twobyte_form) {
2694 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2695 } else {
2696 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2697 /*X=*/ false,
2698 src2.NeedsRex(),
2699 SET_VEX_M_0F);
2700 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2701 }
2702 EmitUint8(ByteZero);
2703 EmitUint8(ByteOne);
2704 if (!is_twobyte_form) {
2705 EmitUint8(ByteTwo);
2706 }
2707 EmitUint8(0xEF);
2708 EmitXmmRegisterOperand(dst.LowBits(), src2);
2709 }
2710
2711 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2712 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2713 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2714 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2715 bool is_twobyte_form = false;
2716 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2717 if (!src2.NeedsRex()) {
2718 is_twobyte_form = true;
2719 }
2720 X86_64ManagedRegister vvvv_reg =
2721 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2722 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2723 if (is_twobyte_form) {
2724 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2725 } else {
2726 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2727 /*X=*/ false,
2728 src2.NeedsRex(),
2729 SET_VEX_M_0F);
2730 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2731 }
2732 EmitUint8(ByteZero);
2733 EmitUint8(ByteOne);
2734 if (!is_twobyte_form) {
2735 EmitUint8(ByteTwo);
2736 }
2737 EmitUint8(0x57);
2738 EmitXmmRegisterOperand(dst.LowBits(), src2);
2739 }
2740
2741 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2742 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2743 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2744 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745 bool is_twobyte_form = false;
2746 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2747 if (!src2.NeedsRex()) {
2748 is_twobyte_form = true;
2749 }
2750 X86_64ManagedRegister vvvv_reg =
2751 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2752 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2753 if (is_twobyte_form) {
2754 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2755 } else {
2756 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2757 /*X=*/ false,
2758 src2.NeedsRex(),
2759 SET_VEX_M_0F);
2760 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2761 }
2762 EmitUint8(ByteZero);
2763 EmitUint8(ByteOne);
2764 if (!is_twobyte_form) {
2765 EmitUint8(ByteTwo);
2766 }
2767 EmitUint8(0x57);
2768 EmitXmmRegisterOperand(dst.LowBits(), src2);
2769 }
2770
andpd(XmmRegister dst,const Address & src)2771 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2772 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773 EmitUint8(0x66);
2774 EmitOptionalRex32(dst, src);
2775 EmitUint8(0x0F);
2776 EmitUint8(0x54);
2777 EmitOperand(dst.LowBits(), src);
2778 }
2779
andpd(XmmRegister dst,XmmRegister src)2780 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2781 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2782 EmitUint8(0x66);
2783 EmitOptionalRex32(dst, src);
2784 EmitUint8(0x0F);
2785 EmitUint8(0x54);
2786 EmitXmmRegisterOperand(dst.LowBits(), src);
2787 }
2788
andps(XmmRegister dst,XmmRegister src)2789 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2790 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2791 EmitOptionalRex32(dst, src);
2792 EmitUint8(0x0F);
2793 EmitUint8(0x54);
2794 EmitXmmRegisterOperand(dst.LowBits(), src);
2795 }
2796
pand(XmmRegister dst,XmmRegister src)2797 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2798 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2799 EmitUint8(0x66);
2800 EmitOptionalRex32(dst, src);
2801 EmitUint8(0x0F);
2802 EmitUint8(0xDB);
2803 EmitXmmRegisterOperand(dst.LowBits(), src);
2804 }
2805
2806 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2807 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2808 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2809 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2810 bool is_twobyte_form = false;
2811 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2812 if (!src2.NeedsRex()) {
2813 is_twobyte_form = true;
2814 }
2815 X86_64ManagedRegister vvvv_reg =
2816 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2817 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2818 if (is_twobyte_form) {
2819 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2820 } else {
2821 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2822 /*X=*/ false,
2823 src2.NeedsRex(),
2824 SET_VEX_M_0F);
2825 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2826 }
2827 EmitUint8(ByteZero);
2828 EmitUint8(ByteOne);
2829 if (!is_twobyte_form) {
2830 EmitUint8(ByteTwo);
2831 }
2832 EmitUint8(0xDB);
2833 EmitXmmRegisterOperand(dst.LowBits(), src2);
2834 }
2835
2836 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2837 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2838 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2839 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2840 bool is_twobyte_form = false;
2841 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2842 if (!src2.NeedsRex()) {
2843 is_twobyte_form = true;
2844 }
2845 X86_64ManagedRegister vvvv_reg =
2846 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2847 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2848 if (is_twobyte_form) {
2849 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2850 } else {
2851 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2852 /*X=*/ false,
2853 src2.NeedsRex(),
2854 SET_VEX_M_0F);
2855 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2856 }
2857 EmitUint8(ByteZero);
2858 EmitUint8(ByteOne);
2859 if (!is_twobyte_form) {
2860 EmitUint8(ByteTwo);
2861 }
2862 EmitUint8(0x54);
2863 EmitXmmRegisterOperand(dst.LowBits(), src2);
2864 }
2865
2866 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2867 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2868 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2869 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2870 bool is_twobyte_form = false;
2871 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2872 if (!src2.NeedsRex()) {
2873 is_twobyte_form = true;
2874 }
2875 X86_64ManagedRegister vvvv_reg =
2876 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2877 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2878 if (is_twobyte_form) {
2879 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2880 } else {
2881 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2882 /*X=*/ false,
2883 src2.NeedsRex(),
2884 SET_VEX_M_0F);
2885 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2886 }
2887 EmitUint8(ByteZero);
2888 EmitUint8(ByteOne);
2889 if (!is_twobyte_form) {
2890 EmitUint8(ByteTwo);
2891 }
2892 EmitUint8(0x54);
2893 EmitXmmRegisterOperand(dst.LowBits(), src2);
2894 }
2895
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2896 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2897 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2898 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2899 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2900 /*X=*/ false,
2901 src2.NeedsRex(),
2902 SET_VEX_M_0F_38);
2903 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2904 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2905 SET_VEX_L_128,
2906 SET_VEX_PP_NONE);
2907 EmitUint8(byte_zero);
2908 EmitUint8(byte_one);
2909 EmitUint8(byte_two);
2910 // Opcode field
2911 EmitUint8(0xF2);
2912 EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2913 }
2914
andnpd(XmmRegister dst,XmmRegister src)2915 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2916 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2917 EmitUint8(0x66);
2918 EmitOptionalRex32(dst, src);
2919 EmitUint8(0x0F);
2920 EmitUint8(0x55);
2921 EmitXmmRegisterOperand(dst.LowBits(), src);
2922 }
2923
andnps(XmmRegister dst,XmmRegister src)2924 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2925 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2926 EmitOptionalRex32(dst, src);
2927 EmitUint8(0x0F);
2928 EmitUint8(0x55);
2929 EmitXmmRegisterOperand(dst.LowBits(), src);
2930 }
2931
pandn(XmmRegister dst,XmmRegister src)2932 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2933 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2934 EmitUint8(0x66);
2935 EmitOptionalRex32(dst, src);
2936 EmitUint8(0x0F);
2937 EmitUint8(0xDF);
2938 EmitXmmRegisterOperand(dst.LowBits(), src);
2939 }
2940
2941 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2942 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2943 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2944 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2945 bool is_twobyte_form = false;
2946 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2947 if (!src2.NeedsRex()) {
2948 is_twobyte_form = true;
2949 }
2950 X86_64ManagedRegister vvvv_reg =
2951 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2952 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2953 if (is_twobyte_form) {
2954 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2955 } else {
2956 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2957 /*X=*/ false,
2958 src2.NeedsRex(),
2959 SET_VEX_M_0F);
2960 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2961 }
2962 EmitUint8(ByteZero);
2963 EmitUint8(ByteOne);
2964 if (!is_twobyte_form) {
2965 EmitUint8(ByteTwo);
2966 }
2967 EmitUint8(0xDF);
2968 EmitXmmRegisterOperand(dst.LowBits(), src2);
2969 }
2970
2971 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2972 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2973 DCHECK(CpuHasAVXorAVX2FeatureFlag());
2974 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975 bool is_twobyte_form = false;
2976 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2977 if (!src2.NeedsRex()) {
2978 is_twobyte_form = true;
2979 }
2980 X86_64ManagedRegister vvvv_reg =
2981 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2982 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2983 if (is_twobyte_form) {
2984 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2985 } else {
2986 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2987 /*X=*/ false,
2988 src2.NeedsRex(),
2989 SET_VEX_M_0F);
2990 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2991 }
2992 EmitUint8(ByteZero);
2993 EmitUint8(ByteOne);
2994 if (!is_twobyte_form) {
2995 EmitUint8(ByteTwo);
2996 }
2997 EmitUint8(0x55);
2998 EmitXmmRegisterOperand(dst.LowBits(), src2);
2999 }
3000
3001 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3002 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3003 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005 bool is_twobyte_form = false;
3006 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3007 if (!src2.NeedsRex()) {
3008 is_twobyte_form = true;
3009 }
3010 X86_64ManagedRegister vvvv_reg =
3011 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3012 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3013 if (is_twobyte_form) {
3014 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3015 } else {
3016 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3017 /*X=*/ false,
3018 src2.NeedsRex(),
3019 SET_VEX_M_0F);
3020 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3021 }
3022 EmitUint8(ByteZero);
3023 EmitUint8(ByteOne);
3024 if (!is_twobyte_form) {
3025 EmitUint8(ByteTwo);
3026 }
3027 EmitUint8(0x55);
3028 EmitXmmRegisterOperand(dst.LowBits(), src2);
3029 }
3030
orpd(XmmRegister dst,XmmRegister src)3031 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
3032 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3033 EmitUint8(0x66);
3034 EmitOptionalRex32(dst, src);
3035 EmitUint8(0x0F);
3036 EmitUint8(0x56);
3037 EmitXmmRegisterOperand(dst.LowBits(), src);
3038 }
3039
orps(XmmRegister dst,XmmRegister src)3040 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3041 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3042 EmitOptionalRex32(dst, src);
3043 EmitUint8(0x0F);
3044 EmitUint8(0x56);
3045 EmitXmmRegisterOperand(dst.LowBits(), src);
3046 }
3047
por(XmmRegister dst,XmmRegister src)3048 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3049 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3050 EmitUint8(0x66);
3051 EmitOptionalRex32(dst, src);
3052 EmitUint8(0x0F);
3053 EmitUint8(0xEB);
3054 EmitXmmRegisterOperand(dst.LowBits(), src);
3055 }
3056
3057 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3058 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3059 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3060 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3061 bool is_twobyte_form = false;
3062 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3063 if (!src2.NeedsRex()) {
3064 is_twobyte_form = true;
3065 }
3066 X86_64ManagedRegister vvvv_reg =
3067 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3068 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3069 if (is_twobyte_form) {
3070 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3071 } else {
3072 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3073 /*X=*/ false,
3074 src2.NeedsRex(),
3075 SET_VEX_M_0F);
3076 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3077 }
3078 EmitUint8(ByteZero);
3079 EmitUint8(ByteOne);
3080 if (!is_twobyte_form) {
3081 EmitUint8(ByteTwo);
3082 }
3083 EmitUint8(0xEB);
3084 EmitXmmRegisterOperand(dst.LowBits(), src2);
3085 }
3086
3087 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3088 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3089 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3090 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3091 bool is_twobyte_form = false;
3092 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3093 if (!src2.NeedsRex()) {
3094 is_twobyte_form = true;
3095 }
3096 X86_64ManagedRegister vvvv_reg =
3097 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3098 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3099 if (is_twobyte_form) {
3100 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3101 } else {
3102 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3103 /*X=*/ false,
3104 src2.NeedsRex(),
3105 SET_VEX_M_0F);
3106 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3107 }
3108 EmitUint8(ByteZero);
3109 EmitUint8(ByteOne);
3110 if (!is_twobyte_form) {
3111 EmitUint8(ByteTwo);
3112 }
3113 EmitUint8(0x56);
3114 EmitXmmRegisterOperand(dst.LowBits(), src2);
3115 }
3116
3117 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3118 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3119 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121 bool is_twobyte_form = false;
3122 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3123 if (!src2.NeedsRex()) {
3124 is_twobyte_form = true;
3125 }
3126 X86_64ManagedRegister vvvv_reg =
3127 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3128 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3129 if (is_twobyte_form) {
3130 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3131 } else {
3132 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3133 /*X=*/ false,
3134 src2.NeedsRex(),
3135 SET_VEX_M_0F);
3136 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3137 }
3138 EmitUint8(ByteZero);
3139 EmitUint8(ByteOne);
3140 if (!is_twobyte_form) {
3141 EmitUint8(ByteTwo);
3142 }
3143 EmitUint8(0x56);
3144 EmitXmmRegisterOperand(dst.LowBits(), src2);
3145 }
3146
pavgb(XmmRegister dst,XmmRegister src)3147 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3148 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149 EmitUint8(0x66);
3150 EmitOptionalRex32(dst, src);
3151 EmitUint8(0x0F);
3152 EmitUint8(0xE0);
3153 EmitXmmRegisterOperand(dst.LowBits(), src);
3154 }
3155
pavgw(XmmRegister dst,XmmRegister src)3156 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3157 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3158 EmitUint8(0x66);
3159 EmitOptionalRex32(dst, src);
3160 EmitUint8(0x0F);
3161 EmitUint8(0xE3);
3162 EmitXmmRegisterOperand(dst.LowBits(), src);
3163 }
3164
psadbw(XmmRegister dst,XmmRegister src)3165 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3166 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3167 EmitUint8(0x66);
3168 EmitOptionalRex32(dst, src);
3169 EmitUint8(0x0F);
3170 EmitUint8(0xF6);
3171 EmitXmmRegisterOperand(dst.LowBits(), src);
3172 }
3173
pmaddwd(XmmRegister dst,XmmRegister src)3174 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3175 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3176 EmitUint8(0x66);
3177 EmitOptionalRex32(dst, src);
3178 EmitUint8(0x0F);
3179 EmitUint8(0xF5);
3180 EmitXmmRegisterOperand(dst.LowBits(), src);
3181 }
3182
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3183 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3184 DCHECK(CpuHasAVXorAVX2FeatureFlag());
3185 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3186 bool is_twobyte_form = false;
3187 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3188 if (!src2.NeedsRex()) {
3189 is_twobyte_form = true;
3190 }
3191 ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3192 X86_64ManagedRegister vvvv_reg =
3193 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3194 if (is_twobyte_form) {
3195 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3196 } else {
3197 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3198 /*X=*/ false,
3199 src2.NeedsRex(),
3200 SET_VEX_M_0F);
3201 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3202 }
3203 EmitUint8(ByteZero);
3204 EmitUint8(ByteOne);
3205 if (!is_twobyte_form) {
3206 EmitUint8(ByteTwo);
3207 }
3208 EmitUint8(0xF5);
3209 EmitXmmRegisterOperand(dst.LowBits(), src2);
3210 }
3211
phaddw(XmmRegister dst,XmmRegister src)3212 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3213 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3214 EmitUint8(0x66);
3215 EmitOptionalRex32(dst, src);
3216 EmitUint8(0x0F);
3217 EmitUint8(0x38);
3218 EmitUint8(0x01);
3219 EmitXmmRegisterOperand(dst.LowBits(), src);
3220 }
3221
phaddd(XmmRegister dst,XmmRegister src)3222 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3223 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3224 EmitUint8(0x66);
3225 EmitOptionalRex32(dst, src);
3226 EmitUint8(0x0F);
3227 EmitUint8(0x38);
3228 EmitUint8(0x02);
3229 EmitXmmRegisterOperand(dst.LowBits(), src);
3230 }
3231
haddps(XmmRegister dst,XmmRegister src)3232 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3233 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3234 EmitUint8(0xF2);
3235 EmitOptionalRex32(dst, src);
3236 EmitUint8(0x0F);
3237 EmitUint8(0x7C);
3238 EmitXmmRegisterOperand(dst.LowBits(), src);
3239 }
3240
haddpd(XmmRegister dst,XmmRegister src)3241 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3242 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3243 EmitUint8(0x66);
3244 EmitOptionalRex32(dst, src);
3245 EmitUint8(0x0F);
3246 EmitUint8(0x7C);
3247 EmitXmmRegisterOperand(dst.LowBits(), src);
3248 }
3249
phsubw(XmmRegister dst,XmmRegister src)3250 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3251 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252 EmitUint8(0x66);
3253 EmitOptionalRex32(dst, src);
3254 EmitUint8(0x0F);
3255 EmitUint8(0x38);
3256 EmitUint8(0x05);
3257 EmitXmmRegisterOperand(dst.LowBits(), src);
3258 }
3259
phsubd(XmmRegister dst,XmmRegister src)3260 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3261 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3262 EmitUint8(0x66);
3263 EmitOptionalRex32(dst, src);
3264 EmitUint8(0x0F);
3265 EmitUint8(0x38);
3266 EmitUint8(0x06);
3267 EmitXmmRegisterOperand(dst.LowBits(), src);
3268 }
3269
hsubps(XmmRegister dst,XmmRegister src)3270 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3271 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3272 EmitUint8(0xF2);
3273 EmitOptionalRex32(dst, src);
3274 EmitUint8(0x0F);
3275 EmitUint8(0x7D);
3276 EmitXmmRegisterOperand(dst.LowBits(), src);
3277 }
3278
hsubpd(XmmRegister dst,XmmRegister src)3279 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3280 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3281 EmitUint8(0x66);
3282 EmitOptionalRex32(dst, src);
3283 EmitUint8(0x0F);
3284 EmitUint8(0x7D);
3285 EmitXmmRegisterOperand(dst.LowBits(), src);
3286 }
3287
pminsb(XmmRegister dst,XmmRegister src)3288 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3289 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3290 EmitUint8(0x66);
3291 EmitOptionalRex32(dst, src);
3292 EmitUint8(0x0F);
3293 EmitUint8(0x38);
3294 EmitUint8(0x38);
3295 EmitXmmRegisterOperand(dst.LowBits(), src);
3296 }
3297
pmaxsb(XmmRegister dst,XmmRegister src)3298 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3299 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3300 EmitUint8(0x66);
3301 EmitOptionalRex32(dst, src);
3302 EmitUint8(0x0F);
3303 EmitUint8(0x38);
3304 EmitUint8(0x3C);
3305 EmitXmmRegisterOperand(dst.LowBits(), src);
3306 }
3307
pminsw(XmmRegister dst,XmmRegister src)3308 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3309 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3310 EmitUint8(0x66);
3311 EmitOptionalRex32(dst, src);
3312 EmitUint8(0x0F);
3313 EmitUint8(0xEA);
3314 EmitXmmRegisterOperand(dst.LowBits(), src);
3315 }
3316
pmaxsw(XmmRegister dst,XmmRegister src)3317 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3318 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3319 EmitUint8(0x66);
3320 EmitOptionalRex32(dst, src);
3321 EmitUint8(0x0F);
3322 EmitUint8(0xEE);
3323 EmitXmmRegisterOperand(dst.LowBits(), src);
3324 }
3325
pminsd(XmmRegister dst,XmmRegister src)3326 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3327 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3328 EmitUint8(0x66);
3329 EmitOptionalRex32(dst, src);
3330 EmitUint8(0x0F);
3331 EmitUint8(0x38);
3332 EmitUint8(0x39);
3333 EmitXmmRegisterOperand(dst.LowBits(), src);
3334 }
3335
pmaxsd(XmmRegister dst,XmmRegister src)3336 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3337 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3338 EmitUint8(0x66);
3339 EmitOptionalRex32(dst, src);
3340 EmitUint8(0x0F);
3341 EmitUint8(0x38);
3342 EmitUint8(0x3D);
3343 EmitXmmRegisterOperand(dst.LowBits(), src);
3344 }
3345
pminub(XmmRegister dst,XmmRegister src)3346 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3347 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3348 EmitUint8(0x66);
3349 EmitOptionalRex32(dst, src);
3350 EmitUint8(0x0F);
3351 EmitUint8(0xDA);
3352 EmitXmmRegisterOperand(dst.LowBits(), src);
3353 }
3354
pmaxub(XmmRegister dst,XmmRegister src)3355 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3356 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3357 EmitUint8(0x66);
3358 EmitOptionalRex32(dst, src);
3359 EmitUint8(0x0F);
3360 EmitUint8(0xDE);
3361 EmitXmmRegisterOperand(dst.LowBits(), src);
3362 }
3363
pminuw(XmmRegister dst,XmmRegister src)3364 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3365 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3366 EmitUint8(0x66);
3367 EmitOptionalRex32(dst, src);
3368 EmitUint8(0x0F);
3369 EmitUint8(0x38);
3370 EmitUint8(0x3A);
3371 EmitXmmRegisterOperand(dst.LowBits(), src);
3372 }
3373
pmaxuw(XmmRegister dst,XmmRegister src)3374 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3375 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3376 EmitUint8(0x66);
3377 EmitOptionalRex32(dst, src);
3378 EmitUint8(0x0F);
3379 EmitUint8(0x38);
3380 EmitUint8(0x3E);
3381 EmitXmmRegisterOperand(dst.LowBits(), src);
3382 }
3383
pminud(XmmRegister dst,XmmRegister src)3384 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3385 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3386 EmitUint8(0x66);
3387 EmitOptionalRex32(dst, src);
3388 EmitUint8(0x0F);
3389 EmitUint8(0x38);
3390 EmitUint8(0x3B);
3391 EmitXmmRegisterOperand(dst.LowBits(), src);
3392 }
3393
pmaxud(XmmRegister dst,XmmRegister src)3394 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3395 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3396 EmitUint8(0x66);
3397 EmitOptionalRex32(dst, src);
3398 EmitUint8(0x0F);
3399 EmitUint8(0x38);
3400 EmitUint8(0x3F);
3401 EmitXmmRegisterOperand(dst.LowBits(), src);
3402 }
3403
minps(XmmRegister dst,XmmRegister src)3404 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3405 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3406 EmitOptionalRex32(dst, src);
3407 EmitUint8(0x0F);
3408 EmitUint8(0x5D);
3409 EmitXmmRegisterOperand(dst.LowBits(), src);
3410 }
3411
maxps(XmmRegister dst,XmmRegister src)3412 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3413 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3414 EmitOptionalRex32(dst, src);
3415 EmitUint8(0x0F);
3416 EmitUint8(0x5F);
3417 EmitXmmRegisterOperand(dst.LowBits(), src);
3418 }
3419
minpd(XmmRegister dst,XmmRegister src)3420 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3421 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3422 EmitUint8(0x66);
3423 EmitOptionalRex32(dst, src);
3424 EmitUint8(0x0F);
3425 EmitUint8(0x5D);
3426 EmitXmmRegisterOperand(dst.LowBits(), src);
3427 }
3428
maxpd(XmmRegister dst,XmmRegister src)3429 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3430 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3431 EmitUint8(0x66);
3432 EmitOptionalRex32(dst, src);
3433 EmitUint8(0x0F);
3434 EmitUint8(0x5F);
3435 EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437
pcmpeqb(XmmRegister dst,XmmRegister src)3438 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3439 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440 EmitUint8(0x66);
3441 EmitOptionalRex32(dst, src);
3442 EmitUint8(0x0F);
3443 EmitUint8(0x74);
3444 EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446
pcmpeqw(XmmRegister dst,XmmRegister src)3447 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3448 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449 EmitUint8(0x66);
3450 EmitOptionalRex32(dst, src);
3451 EmitUint8(0x0F);
3452 EmitUint8(0x75);
3453 EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455
pcmpeqd(XmmRegister dst,XmmRegister src)3456 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3457 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458 EmitUint8(0x66);
3459 EmitOptionalRex32(dst, src);
3460 EmitUint8(0x0F);
3461 EmitUint8(0x76);
3462 EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464
pcmpeqq(XmmRegister dst,XmmRegister src)3465 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3466 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467 EmitUint8(0x66);
3468 EmitOptionalRex32(dst, src);
3469 EmitUint8(0x0F);
3470 EmitUint8(0x38);
3471 EmitUint8(0x29);
3472 EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474
pcmpgtb(XmmRegister dst,XmmRegister src)3475 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3476 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477 EmitUint8(0x66);
3478 EmitOptionalRex32(dst, src);
3479 EmitUint8(0x0F);
3480 EmitUint8(0x64);
3481 EmitXmmRegisterOperand(dst.LowBits(), src);
3482 }
3483
pcmpgtw(XmmRegister dst,XmmRegister src)3484 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3485 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3486 EmitUint8(0x66);
3487 EmitOptionalRex32(dst, src);
3488 EmitUint8(0x0F);
3489 EmitUint8(0x65);
3490 EmitXmmRegisterOperand(dst.LowBits(), src);
3491 }
3492
pcmpgtd(XmmRegister dst,XmmRegister src)3493 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3494 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3495 EmitUint8(0x66);
3496 EmitOptionalRex32(dst, src);
3497 EmitUint8(0x0F);
3498 EmitUint8(0x66);
3499 EmitXmmRegisterOperand(dst.LowBits(), src);
3500 }
3501
pcmpgtq(XmmRegister dst,XmmRegister src)3502 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3503 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3504 EmitUint8(0x66);
3505 EmitOptionalRex32(dst, src);
3506 EmitUint8(0x0F);
3507 EmitUint8(0x38);
3508 EmitUint8(0x37);
3509 EmitXmmRegisterOperand(dst.LowBits(), src);
3510 }
3511
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3512 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3513 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3514 EmitUint8(0x66);
3515 EmitOptionalRex32(dst, src);
3516 EmitUint8(0x0F);
3517 EmitUint8(0xC6);
3518 EmitXmmRegisterOperand(dst.LowBits(), src);
3519 EmitUint8(imm.value());
3520 }
3521
3522
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3523 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3524 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3525 EmitOptionalRex32(dst, src);
3526 EmitUint8(0x0F);
3527 EmitUint8(0xC6);
3528 EmitXmmRegisterOperand(dst.LowBits(), src);
3529 EmitUint8(imm.value());
3530 }
3531
3532
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3533 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3534 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3535 EmitUint8(0x66);
3536 EmitOptionalRex32(dst, src);
3537 EmitUint8(0x0F);
3538 EmitUint8(0x70);
3539 EmitXmmRegisterOperand(dst.LowBits(), src);
3540 EmitUint8(imm.value());
3541 }
3542
3543
punpcklbw(XmmRegister dst,XmmRegister src)3544 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3545 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3546 EmitUint8(0x66);
3547 EmitOptionalRex32(dst, src);
3548 EmitUint8(0x0F);
3549 EmitUint8(0x60);
3550 EmitXmmRegisterOperand(dst.LowBits(), src);
3551 }
3552
3553
punpcklwd(XmmRegister dst,XmmRegister src)3554 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3556 EmitUint8(0x66);
3557 EmitOptionalRex32(dst, src);
3558 EmitUint8(0x0F);
3559 EmitUint8(0x61);
3560 EmitXmmRegisterOperand(dst.LowBits(), src);
3561 }
3562
3563
punpckldq(XmmRegister dst,XmmRegister src)3564 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3565 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3566 EmitUint8(0x66);
3567 EmitOptionalRex32(dst, src);
3568 EmitUint8(0x0F);
3569 EmitUint8(0x62);
3570 EmitXmmRegisterOperand(dst.LowBits(), src);
3571 }
3572
3573
punpcklqdq(XmmRegister dst,XmmRegister src)3574 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3575 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3576 EmitUint8(0x66);
3577 EmitOptionalRex32(dst, src);
3578 EmitUint8(0x0F);
3579 EmitUint8(0x6C);
3580 EmitXmmRegisterOperand(dst.LowBits(), src);
3581 }
3582
3583
punpckhbw(XmmRegister dst,XmmRegister src)3584 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3585 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3586 EmitUint8(0x66);
3587 EmitOptionalRex32(dst, src);
3588 EmitUint8(0x0F);
3589 EmitUint8(0x68);
3590 EmitXmmRegisterOperand(dst.LowBits(), src);
3591 }
3592
3593
punpckhwd(XmmRegister dst,XmmRegister src)3594 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3595 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3596 EmitUint8(0x66);
3597 EmitOptionalRex32(dst, src);
3598 EmitUint8(0x0F);
3599 EmitUint8(0x69);
3600 EmitXmmRegisterOperand(dst.LowBits(), src);
3601 }
3602
3603
punpckhdq(XmmRegister dst,XmmRegister src)3604 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3605 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3606 EmitUint8(0x66);
3607 EmitOptionalRex32(dst, src);
3608 EmitUint8(0x0F);
3609 EmitUint8(0x6A);
3610 EmitXmmRegisterOperand(dst.LowBits(), src);
3611 }
3612
3613
punpckhqdq(XmmRegister dst,XmmRegister src)3614 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3615 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3616 EmitUint8(0x66);
3617 EmitOptionalRex32(dst, src);
3618 EmitUint8(0x0F);
3619 EmitUint8(0x6D);
3620 EmitXmmRegisterOperand(dst.LowBits(), src);
3621 }
3622
3623
psllw(XmmRegister reg,const Immediate & shift_count)3624 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3625 DCHECK(shift_count.is_uint8());
3626 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3627 EmitUint8(0x66);
3628 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3629 EmitUint8(0x0F);
3630 EmitUint8(0x71);
3631 EmitXmmRegisterOperand(6, reg);
3632 EmitUint8(shift_count.value());
3633 }
3634
3635
pslld(XmmRegister reg,const Immediate & shift_count)3636 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3637 DCHECK(shift_count.is_uint8());
3638 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3639 EmitUint8(0x66);
3640 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3641 EmitUint8(0x0F);
3642 EmitUint8(0x72);
3643 EmitXmmRegisterOperand(6, reg);
3644 EmitUint8(shift_count.value());
3645 }
3646
3647
psllq(XmmRegister reg,const Immediate & shift_count)3648 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3649 DCHECK(shift_count.is_uint8());
3650 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3651 EmitUint8(0x66);
3652 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3653 EmitUint8(0x0F);
3654 EmitUint8(0x73);
3655 EmitXmmRegisterOperand(6, reg);
3656 EmitUint8(shift_count.value());
3657 }
3658
3659
psraw(XmmRegister reg,const Immediate & shift_count)3660 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3661 DCHECK(shift_count.is_uint8());
3662 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3663 EmitUint8(0x66);
3664 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3665 EmitUint8(0x0F);
3666 EmitUint8(0x71);
3667 EmitXmmRegisterOperand(4, reg);
3668 EmitUint8(shift_count.value());
3669 }
3670
3671
psrad(XmmRegister reg,const Immediate & shift_count)3672 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3673 DCHECK(shift_count.is_uint8());
3674 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3675 EmitUint8(0x66);
3676 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3677 EmitUint8(0x0F);
3678 EmitUint8(0x72);
3679 EmitXmmRegisterOperand(4, reg);
3680 EmitUint8(shift_count.value());
3681 }
3682
3683
psrlw(XmmRegister reg,const Immediate & shift_count)3684 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3685 DCHECK(shift_count.is_uint8());
3686 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3687 EmitUint8(0x66);
3688 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3689 EmitUint8(0x0F);
3690 EmitUint8(0x71);
3691 EmitXmmRegisterOperand(2, reg);
3692 EmitUint8(shift_count.value());
3693 }
3694
3695
psrld(XmmRegister reg,const Immediate & shift_count)3696 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3697 DCHECK(shift_count.is_uint8());
3698 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3699 EmitUint8(0x66);
3700 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3701 EmitUint8(0x0F);
3702 EmitUint8(0x72);
3703 EmitXmmRegisterOperand(2, reg);
3704 EmitUint8(shift_count.value());
3705 }
3706
3707
psrlq(XmmRegister reg,const Immediate & shift_count)3708 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3709 DCHECK(shift_count.is_uint8());
3710 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711 EmitUint8(0x66);
3712 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3713 EmitUint8(0x0F);
3714 EmitUint8(0x73);
3715 EmitXmmRegisterOperand(2, reg);
3716 EmitUint8(shift_count.value());
3717 }
3718
3719
psrldq(XmmRegister reg,const Immediate & shift_count)3720 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3721 DCHECK(shift_count.is_uint8());
3722 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3723 EmitUint8(0x66);
3724 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3725 EmitUint8(0x0F);
3726 EmitUint8(0x73);
3727 EmitXmmRegisterOperand(3, reg);
3728 EmitUint8(shift_count.value());
3729 }
3730
3731
fldl(const Address & src)3732 void X86_64Assembler::fldl(const Address& src) {
3733 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3734 EmitUint8(0xDD);
3735 EmitOperand(0, src);
3736 }
3737
3738
fstl(const Address & dst)3739 void X86_64Assembler::fstl(const Address& dst) {
3740 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3741 EmitUint8(0xDD);
3742 EmitOperand(2, dst);
3743 }
3744
3745
fstpl(const Address & dst)3746 void X86_64Assembler::fstpl(const Address& dst) {
3747 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3748 EmitUint8(0xDD);
3749 EmitOperand(3, dst);
3750 }
3751
3752
fstsw()3753 void X86_64Assembler::fstsw() {
3754 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3755 EmitUint8(0x9B);
3756 EmitUint8(0xDF);
3757 EmitUint8(0xE0);
3758 }
3759
3760
fnstcw(const Address & dst)3761 void X86_64Assembler::fnstcw(const Address& dst) {
3762 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3763 EmitUint8(0xD9);
3764 EmitOperand(7, dst);
3765 }
3766
3767
fldcw(const Address & src)3768 void X86_64Assembler::fldcw(const Address& src) {
3769 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3770 EmitUint8(0xD9);
3771 EmitOperand(5, src);
3772 }
3773
3774
fistpl(const Address & dst)3775 void X86_64Assembler::fistpl(const Address& dst) {
3776 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3777 EmitUint8(0xDF);
3778 EmitOperand(7, dst);
3779 }
3780
3781
fistps(const Address & dst)3782 void X86_64Assembler::fistps(const Address& dst) {
3783 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3784 EmitUint8(0xDB);
3785 EmitOperand(3, dst);
3786 }
3787
3788
fildl(const Address & src)3789 void X86_64Assembler::fildl(const Address& src) {
3790 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3791 EmitUint8(0xDF);
3792 EmitOperand(5, src);
3793 }
3794
3795
filds(const Address & src)3796 void X86_64Assembler::filds(const Address& src) {
3797 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3798 EmitUint8(0xDB);
3799 EmitOperand(0, src);
3800 }
3801
3802
fincstp()3803 void X86_64Assembler::fincstp() {
3804 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3805 EmitUint8(0xD9);
3806 EmitUint8(0xF7);
3807 }
3808
3809
ffree(const Immediate & index)3810 void X86_64Assembler::ffree(const Immediate& index) {
3811 CHECK_LT(index.value(), 7);
3812 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3813 EmitUint8(0xDD);
3814 EmitUint8(0xC0 + index.value());
3815 }
3816
3817
fsin()3818 void X86_64Assembler::fsin() {
3819 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3820 EmitUint8(0xD9);
3821 EmitUint8(0xFE);
3822 }
3823
3824
fcos()3825 void X86_64Assembler::fcos() {
3826 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3827 EmitUint8(0xD9);
3828 EmitUint8(0xFF);
3829 }
3830
3831
fptan()3832 void X86_64Assembler::fptan() {
3833 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3834 EmitUint8(0xD9);
3835 EmitUint8(0xF2);
3836 }
3837
fucompp()3838 void X86_64Assembler::fucompp() {
3839 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3840 EmitUint8(0xDA);
3841 EmitUint8(0xE9);
3842 }
3843
3844
fprem()3845 void X86_64Assembler::fprem() {
3846 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3847 EmitUint8(0xD9);
3848 EmitUint8(0xF8);
3849 }
3850
3851
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))3852 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
3853 CpuRegister src,
3854 void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
3855 Register src_reg = src.AsRegister();
3856 Register dst_reg = dst.AsRegister();
3857 if (src_reg != RAX && dst_reg != RAX) {
3858 return false;
3859 }
3860 if (dst_reg == RAX) {
3861 std::swap(src_reg, dst_reg);
3862 }
3863 if (dst_reg != RAX) {
3864 // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
3865 (this->*prefix_fn)(CpuRegister(dst_reg));
3866 }
3867 EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
3868 return true;
3869 }
3870
3871
xchgb(CpuRegister dst,CpuRegister src)3872 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
3873 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3874 // There is no short version for AL.
3875 EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
3876 EmitUint8(0x86);
3877 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3878 }
3879
3880
xchgb(CpuRegister reg,const Address & address)3881 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
3882 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3883 EmitOptionalByteRegNormalizingRex32(reg, address);
3884 EmitUint8(0x86);
3885 EmitOperand(reg.LowBits(), address);
3886 }
3887
3888
xchgw(CpuRegister dst,CpuRegister src)3889 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
3890 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3891 EmitOperandSizeOverride();
3892 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3893 // A short version for AX.
3894 return;
3895 }
3896 // General case.
3897 EmitOptionalRex32(dst, src);
3898 EmitUint8(0x87);
3899 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3900 }
3901
3902
xchgw(CpuRegister reg,const Address & address)3903 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
3904 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905 EmitOperandSizeOverride();
3906 EmitOptionalRex32(reg, address);
3907 EmitUint8(0x87);
3908 EmitOperand(reg.LowBits(), address);
3909 }
3910
3911
xchgl(CpuRegister dst,CpuRegister src)3912 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3913 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3914 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3915 // A short version for EAX.
3916 return;
3917 }
3918 // General case.
3919 EmitOptionalRex32(dst, src);
3920 EmitUint8(0x87);
3921 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3922 }
3923
3924
xchgl(CpuRegister reg,const Address & address)3925 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3926 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3927 EmitOptionalRex32(reg, address);
3928 EmitUint8(0x87);
3929 EmitOperand(reg.LowBits(), address);
3930 }
3931
3932
xchgq(CpuRegister dst,CpuRegister src)3933 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3934 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3935 if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
3936 // A short version for RAX.
3937 return;
3938 }
3939 // General case.
3940 EmitRex64(dst, src);
3941 EmitUint8(0x87);
3942 EmitRegisterOperand(dst.LowBits(), src.LowBits());
3943 }
3944
3945
xchgq(CpuRegister reg,const Address & address)3946 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
3947 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3948 EmitRex64(reg, address);
3949 EmitUint8(0x87);
3950 EmitOperand(reg.LowBits(), address);
3951 }
3952
3953
xaddb(CpuRegister dst,CpuRegister src)3954 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
3955 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3956 EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
3957 EmitUint8(0x0F);
3958 EmitUint8(0xC0);
3959 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3960 }
3961
3962
xaddb(const Address & address,CpuRegister reg)3963 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
3964 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3965 EmitOptionalByteRegNormalizingRex32(reg, address);
3966 EmitUint8(0x0F);
3967 EmitUint8(0xC0);
3968 EmitOperand(reg.LowBits(), address);
3969 }
3970
3971
xaddw(CpuRegister dst,CpuRegister src)3972 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
3973 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3974 EmitOperandSizeOverride();
3975 EmitOptionalRex32(src, dst);
3976 EmitUint8(0x0F);
3977 EmitUint8(0xC1);
3978 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3979 }
3980
3981
xaddw(const Address & address,CpuRegister reg)3982 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
3983 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3984 EmitOperandSizeOverride();
3985 EmitOptionalRex32(reg, address);
3986 EmitUint8(0x0F);
3987 EmitUint8(0xC1);
3988 EmitOperand(reg.LowBits(), address);
3989 }
3990
3991
xaddl(CpuRegister dst,CpuRegister src)3992 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
3993 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3994 EmitOptionalRex32(src, dst);
3995 EmitUint8(0x0F);
3996 EmitUint8(0xC1);
3997 EmitRegisterOperand(src.LowBits(), dst.LowBits());
3998 }
3999
4000
xaddl(const Address & address,CpuRegister reg)4001 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
4002 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4003 EmitOptionalRex32(reg, address);
4004 EmitUint8(0x0F);
4005 EmitUint8(0xC1);
4006 EmitOperand(reg.LowBits(), address);
4007 }
4008
4009
xaddq(CpuRegister dst,CpuRegister src)4010 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
4011 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4012 EmitRex64(src, dst);
4013 EmitUint8(0x0F);
4014 EmitUint8(0xC1);
4015 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4016 }
4017
4018
xaddq(const Address & address,CpuRegister reg)4019 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
4020 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4021 EmitRex64(reg, address);
4022 EmitUint8(0x0F);
4023 EmitUint8(0xC1);
4024 EmitOperand(reg.LowBits(), address);
4025 }
4026
4027
cmpb(const Address & address,const Immediate & imm)4028 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
4029 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4030 CHECK(imm.is_int32());
4031 EmitOptionalRex32(address);
4032 EmitUint8(0x80);
4033 EmitOperand(7, address);
4034 EmitUint8(imm.value() & 0xFF);
4035 }
4036
4037
cmpw(const Address & address,const Immediate & imm)4038 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
4039 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4040 CHECK(imm.is_int32());
4041 EmitOperandSizeOverride();
4042 EmitOptionalRex32(address);
4043 EmitComplex(7, address, imm, /* is_16_op= */ true);
4044 }
4045
4046
cmpl(CpuRegister reg,const Immediate & imm)4047 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
4048 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4049 CHECK(imm.is_int32());
4050 EmitOptionalRex32(reg);
4051 EmitComplex(7, Operand(reg), imm);
4052 }
4053
4054
cmpl(CpuRegister reg0,CpuRegister reg1)4055 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
4056 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4057 EmitOptionalRex32(reg0, reg1);
4058 EmitUint8(0x3B);
4059 EmitOperand(reg0.LowBits(), Operand(reg1));
4060 }
4061
4062
cmpl(CpuRegister reg,const Address & address)4063 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
4064 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4065 EmitOptionalRex32(reg, address);
4066 EmitUint8(0x3B);
4067 EmitOperand(reg.LowBits(), address);
4068 }
4069
4070
cmpl(const Address & address,CpuRegister reg)4071 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
4072 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4073 EmitOptionalRex32(reg, address);
4074 EmitUint8(0x39);
4075 EmitOperand(reg.LowBits(), address);
4076 }
4077
4078
cmpl(const Address & address,const Immediate & imm)4079 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
4080 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4081 CHECK(imm.is_int32());
4082 EmitOptionalRex32(address);
4083 EmitComplex(7, address, imm);
4084 }
4085
4086
cmpq(CpuRegister reg0,CpuRegister reg1)4087 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
4088 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4089 EmitRex64(reg0, reg1);
4090 EmitUint8(0x3B);
4091 EmitOperand(reg0.LowBits(), Operand(reg1));
4092 }
4093
4094
cmpq(CpuRegister reg,const Immediate & imm)4095 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
4096 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4097 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
4098 EmitRex64(reg);
4099 EmitComplex(7, Operand(reg), imm);
4100 }
4101
4102
cmpq(CpuRegister reg,const Address & address)4103 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
4104 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4105 EmitRex64(reg, address);
4106 EmitUint8(0x3B);
4107 EmitOperand(reg.LowBits(), address);
4108 }
4109
4110
cmpq(const Address & address,const Immediate & imm)4111 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
4112 CHECK(imm.is_int32()); // cmpq only supports 32b immediate.
4113 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4114 EmitRex64(address);
4115 EmitComplex(7, address, imm);
4116 }
4117
4118
addl(CpuRegister dst,CpuRegister src)4119 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
4120 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4121 EmitOptionalRex32(dst, src);
4122 EmitUint8(0x03);
4123 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4124 }
4125
4126
addl(CpuRegister reg,const Address & address)4127 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
4128 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4129 EmitOptionalRex32(reg, address);
4130 EmitUint8(0x03);
4131 EmitOperand(reg.LowBits(), address);
4132 }
4133
4134
testl(CpuRegister reg1,CpuRegister reg2)4135 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
4136 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4137 EmitOptionalRex32(reg1, reg2);
4138 EmitUint8(0x85);
4139 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4140 }
4141
4142
testl(CpuRegister reg,const Address & address)4143 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
4144 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4145 EmitOptionalRex32(reg, address);
4146 EmitUint8(0x85);
4147 EmitOperand(reg.LowBits(), address);
4148 }
4149
4150
testl(CpuRegister reg,const Immediate & immediate)4151 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
4152 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4153 // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
4154 // we only test the byte CpuRegister to keep the encoding short.
4155 if (immediate.is_uint8() && reg.AsRegister() < 4) {
4156 // Use zero-extended 8-bit immediate.
4157 if (reg.AsRegister() == RAX) {
4158 EmitUint8(0xA8);
4159 } else {
4160 EmitUint8(0xF6);
4161 EmitUint8(0xC0 + reg.AsRegister());
4162 }
4163 EmitUint8(immediate.value() & 0xFF);
4164 } else if (reg.AsRegister() == RAX) {
4165 // Use short form if the destination is RAX.
4166 EmitUint8(0xA9);
4167 EmitImmediate(immediate);
4168 } else {
4169 EmitOptionalRex32(reg);
4170 EmitUint8(0xF7);
4171 EmitOperand(0, Operand(reg));
4172 EmitImmediate(immediate);
4173 }
4174 }
4175
4176
testq(CpuRegister reg1,CpuRegister reg2)4177 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4178 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4179 EmitRex64(reg1, reg2);
4180 EmitUint8(0x85);
4181 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4182 }
4183
4184
testq(CpuRegister reg,const Address & address)4185 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4186 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4187 EmitRex64(reg, address);
4188 EmitUint8(0x85);
4189 EmitOperand(reg.LowBits(), address);
4190 }
4191
4192
testb(const Address & dst,const Immediate & imm)4193 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4194 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4195 EmitOptionalRex32(dst);
4196 EmitUint8(0xF6);
4197 EmitOperand(Register::RAX, dst);
4198 CHECK(imm.is_int8());
4199 EmitUint8(imm.value() & 0xFF);
4200 }
4201
4202
testl(const Address & dst,const Immediate & imm)4203 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4204 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4205 EmitOptionalRex32(dst);
4206 EmitUint8(0xF7);
4207 EmitOperand(0, dst);
4208 EmitImmediate(imm);
4209 }
4210
4211
andl(CpuRegister dst,CpuRegister src)4212 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4213 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4214 EmitOptionalRex32(dst, src);
4215 EmitUint8(0x23);
4216 EmitOperand(dst.LowBits(), Operand(src));
4217 }
4218
4219
andl(CpuRegister reg,const Address & address)4220 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4221 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4222 EmitOptionalRex32(reg, address);
4223 EmitUint8(0x23);
4224 EmitOperand(reg.LowBits(), address);
4225 }
4226
4227
andl(CpuRegister dst,const Immediate & imm)4228 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4229 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4230 EmitOptionalRex32(dst);
4231 EmitComplex(4, Operand(dst), imm);
4232 }
4233
4234
andq(CpuRegister reg,const Immediate & imm)4235 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4236 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4237 CHECK(imm.is_int32()); // andq only supports 32b immediate.
4238 EmitRex64(reg);
4239 EmitComplex(4, Operand(reg), imm);
4240 }
4241
4242
andq(CpuRegister dst,CpuRegister src)4243 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4244 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4245 EmitRex64(dst, src);
4246 EmitUint8(0x23);
4247 EmitOperand(dst.LowBits(), Operand(src));
4248 }
4249
4250
andq(CpuRegister dst,const Address & src)4251 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4252 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4253 EmitRex64(dst, src);
4254 EmitUint8(0x23);
4255 EmitOperand(dst.LowBits(), src);
4256 }
4257
4258
andw(const Address & address,const Immediate & imm)4259 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4260 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4261 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4262 EmitUint8(0x66);
4263 EmitOptionalRex32(address);
4264 EmitComplex(4, address, imm, /* is_16_op= */ true);
4265 }
4266
4267
orl(CpuRegister dst,CpuRegister src)4268 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4269 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4270 EmitOptionalRex32(dst, src);
4271 EmitUint8(0x0B);
4272 EmitOperand(dst.LowBits(), Operand(src));
4273 }
4274
4275
orl(CpuRegister reg,const Address & address)4276 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4277 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4278 EmitOptionalRex32(reg, address);
4279 EmitUint8(0x0B);
4280 EmitOperand(reg.LowBits(), address);
4281 }
4282
4283
orl(CpuRegister dst,const Immediate & imm)4284 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4285 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4286 EmitOptionalRex32(dst);
4287 EmitComplex(1, Operand(dst), imm);
4288 }
4289
4290
orq(CpuRegister dst,const Immediate & imm)4291 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4292 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4293 CHECK(imm.is_int32()); // orq only supports 32b immediate.
4294 EmitRex64(dst);
4295 EmitComplex(1, Operand(dst), imm);
4296 }
4297
4298
orq(CpuRegister dst,CpuRegister src)4299 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4300 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4301 EmitRex64(dst, src);
4302 EmitUint8(0x0B);
4303 EmitOperand(dst.LowBits(), Operand(src));
4304 }
4305
4306
orq(CpuRegister dst,const Address & src)4307 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4308 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4309 EmitRex64(dst, src);
4310 EmitUint8(0x0B);
4311 EmitOperand(dst.LowBits(), src);
4312 }
4313
4314
xorl(CpuRegister dst,CpuRegister src)4315 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4316 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4317 EmitOptionalRex32(dst, src);
4318 EmitUint8(0x33);
4319 EmitOperand(dst.LowBits(), Operand(src));
4320 }
4321
4322
xorl(CpuRegister reg,const Address & address)4323 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4324 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4325 EmitOptionalRex32(reg, address);
4326 EmitUint8(0x33);
4327 EmitOperand(reg.LowBits(), address);
4328 }
4329
4330
xorl(CpuRegister dst,const Immediate & imm)4331 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4332 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4333 EmitOptionalRex32(dst);
4334 EmitComplex(6, Operand(dst), imm);
4335 }
4336
4337
xorq(CpuRegister dst,CpuRegister src)4338 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4339 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4340 EmitRex64(dst, src);
4341 EmitUint8(0x33);
4342 EmitOperand(dst.LowBits(), Operand(src));
4343 }
4344
4345
xorq(CpuRegister dst,const Immediate & imm)4346 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4347 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4348 CHECK(imm.is_int32()); // xorq only supports 32b immediate.
4349 EmitRex64(dst);
4350 EmitComplex(6, Operand(dst), imm);
4351 }
4352
xorq(CpuRegister dst,const Address & src)4353 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4354 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4355 EmitRex64(dst, src);
4356 EmitUint8(0x33);
4357 EmitOperand(dst.LowBits(), src);
4358 }
4359
4360
4361 #if 0
4362 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4363 // REX.WRXB
4364 // W - 64-bit operand
4365 // R - MODRM.reg
4366 // X - SIB.index
4367 // B - MODRM.rm/SIB.base
4368 uint8_t rex = force ? 0x40 : 0;
4369 if (w) {
4370 rex |= 0x48; // REX.W000
4371 }
4372 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4373 rex |= 0x44; // REX.0R00
4374 *r = static_cast<Register>(*r - 8);
4375 }
4376 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4377 rex |= 0x42; // REX.00X0
4378 *x = static_cast<Register>(*x - 8);
4379 }
4380 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4381 rex |= 0x41; // REX.000B
4382 *b = static_cast<Register>(*b - 8);
4383 }
4384 if (rex != 0) {
4385 EmitUint8(rex);
4386 }
4387 }
4388
4389 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4390 // REX.WRXB
4391 // W - 64-bit operand
4392 // R - MODRM.reg
4393 // X - SIB.index
4394 // B - MODRM.rm/SIB.base
4395 uint8_t rex = mem->rex();
4396 if (force) {
4397 rex |= 0x40; // REX.0000
4398 }
4399 if (w) {
4400 rex |= 0x48; // REX.W000
4401 }
4402 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4403 rex |= 0x44; // REX.0R00
4404 *dst = static_cast<Register>(*dst - 8);
4405 }
4406 if (rex != 0) {
4407 EmitUint8(rex);
4408 }
4409 }
4410
4411 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4412 #endif
4413
addl(CpuRegister reg,const Immediate & imm)4414 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4415 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4416 EmitOptionalRex32(reg);
4417 EmitComplex(0, Operand(reg), imm);
4418 }
4419
4420
addq(CpuRegister reg,const Immediate & imm)4421 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4422 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4423 CHECK(imm.is_int32()); // addq only supports 32b immediate.
4424 EmitRex64(reg);
4425 EmitComplex(0, Operand(reg), imm);
4426 }
4427
4428
addq(CpuRegister dst,const Address & address)4429 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4430 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4431 EmitRex64(dst, address);
4432 EmitUint8(0x03);
4433 EmitOperand(dst.LowBits(), address);
4434 }
4435
4436
addq(CpuRegister dst,CpuRegister src)4437 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4438 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4439 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4440 EmitRex64(src, dst);
4441 EmitUint8(0x01);
4442 EmitRegisterOperand(src.LowBits(), dst.LowBits());
4443 }
4444
4445
addl(const Address & address,CpuRegister reg)4446 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4447 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4448 EmitOptionalRex32(reg, address);
4449 EmitUint8(0x01);
4450 EmitOperand(reg.LowBits(), address);
4451 }
4452
4453
addl(const Address & address,const Immediate & imm)4454 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4455 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4456 EmitOptionalRex32(address);
4457 EmitComplex(0, address, imm);
4458 }
4459
4460
addw(const Address & address,const Immediate & imm)4461 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4462 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4463 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4464 EmitUint8(0x66);
4465 EmitOptionalRex32(address);
4466 EmitComplex(0, address, imm, /* is_16_op= */ true);
4467 }
4468
4469
subl(CpuRegister dst,CpuRegister src)4470 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4471 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4472 EmitOptionalRex32(dst, src);
4473 EmitUint8(0x2B);
4474 EmitOperand(dst.LowBits(), Operand(src));
4475 }
4476
4477
subl(CpuRegister reg,const Immediate & imm)4478 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4479 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4480 EmitOptionalRex32(reg);
4481 EmitComplex(5, Operand(reg), imm);
4482 }
4483
4484
subq(CpuRegister reg,const Immediate & imm)4485 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4486 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4487 CHECK(imm.is_int32()); // subq only supports 32b immediate.
4488 EmitRex64(reg);
4489 EmitComplex(5, Operand(reg), imm);
4490 }
4491
4492
subq(CpuRegister dst,CpuRegister src)4493 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4494 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4495 EmitRex64(dst, src);
4496 EmitUint8(0x2B);
4497 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4498 }
4499
4500
subq(CpuRegister reg,const Address & address)4501 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4502 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4503 EmitRex64(reg, address);
4504 EmitUint8(0x2B);
4505 EmitOperand(reg.LowBits() & 7, address);
4506 }
4507
4508
subl(CpuRegister reg,const Address & address)4509 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4510 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4511 EmitOptionalRex32(reg, address);
4512 EmitUint8(0x2B);
4513 EmitOperand(reg.LowBits(), address);
4514 }
4515
4516
cdq()4517 void X86_64Assembler::cdq() {
4518 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4519 EmitUint8(0x99);
4520 }
4521
4522
cqo()4523 void X86_64Assembler::cqo() {
4524 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4525 EmitRex64();
4526 EmitUint8(0x99);
4527 }
4528
4529
idivl(CpuRegister reg)4530 void X86_64Assembler::idivl(CpuRegister reg) {
4531 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4532 EmitOptionalRex32(reg);
4533 EmitUint8(0xF7);
4534 EmitUint8(0xF8 | reg.LowBits());
4535 }
4536
4537
idivq(CpuRegister reg)4538 void X86_64Assembler::idivq(CpuRegister reg) {
4539 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4540 EmitRex64(reg);
4541 EmitUint8(0xF7);
4542 EmitUint8(0xF8 | reg.LowBits());
4543 }
4544
4545
divl(CpuRegister reg)4546 void X86_64Assembler::divl(CpuRegister reg) {
4547 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4548 EmitOptionalRex32(reg);
4549 EmitUint8(0xF7);
4550 EmitUint8(0xF0 | reg.LowBits());
4551 }
4552
4553
divq(CpuRegister reg)4554 void X86_64Assembler::divq(CpuRegister reg) {
4555 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4556 EmitRex64(reg);
4557 EmitUint8(0xF7);
4558 EmitUint8(0xF0 | reg.LowBits());
4559 }
4560
4561
imull(CpuRegister dst,CpuRegister src)4562 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4563 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4564 EmitOptionalRex32(dst, src);
4565 EmitUint8(0x0F);
4566 EmitUint8(0xAF);
4567 EmitOperand(dst.LowBits(), Operand(src));
4568 }
4569
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4570 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4571 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4572 CHECK(imm.is_int32()); // imull only supports 32b immediate.
4573
4574 EmitOptionalRex32(dst, src);
4575
4576 // See whether imm can be represented as a sign-extended 8bit value.
4577 int32_t v32 = static_cast<int32_t>(imm.value());
4578 if (IsInt<8>(v32)) {
4579 // Sign-extension works.
4580 EmitUint8(0x6B);
4581 EmitOperand(dst.LowBits(), Operand(src));
4582 EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4583 } else {
4584 // Not representable, use full immediate.
4585 EmitUint8(0x69);
4586 EmitOperand(dst.LowBits(), Operand(src));
4587 EmitImmediate(imm);
4588 }
4589 }
4590
4591
imull(CpuRegister reg,const Immediate & imm)4592 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4593 imull(reg, reg, imm);
4594 }
4595
4596
imull(CpuRegister reg,const Address & address)4597 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4598 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4599 EmitOptionalRex32(reg, address);
4600 EmitUint8(0x0F);
4601 EmitUint8(0xAF);
4602 EmitOperand(reg.LowBits(), address);
4603 }
4604
4605
imulq(CpuRegister dst,CpuRegister src)4606 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4607 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4608 EmitRex64(dst, src);
4609 EmitUint8(0x0F);
4610 EmitUint8(0xAF);
4611 EmitRegisterOperand(dst.LowBits(), src.LowBits());
4612 }
4613
4614
imulq(CpuRegister reg,const Immediate & imm)4615 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4616 imulq(reg, reg, imm);
4617 }
4618
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4619 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4620 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4621 CHECK(imm.is_int32()); // imulq only supports 32b immediate.
4622
4623 EmitRex64(dst, reg);
4624
4625 // See whether imm can be represented as a sign-extended 8bit value.
4626 int64_t v64 = imm.value();
4627 if (IsInt<8>(v64)) {
4628 // Sign-extension works.
4629 EmitUint8(0x6B);
4630 EmitOperand(dst.LowBits(), Operand(reg));
4631 EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4632 } else {
4633 // Not representable, use full immediate.
4634 EmitUint8(0x69);
4635 EmitOperand(dst.LowBits(), Operand(reg));
4636 EmitImmediate(imm);
4637 }
4638 }
4639
imulq(CpuRegister reg,const Address & address)4640 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4641 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4642 EmitRex64(reg, address);
4643 EmitUint8(0x0F);
4644 EmitUint8(0xAF);
4645 EmitOperand(reg.LowBits(), address);
4646 }
4647
4648
imull(CpuRegister reg)4649 void X86_64Assembler::imull(CpuRegister reg) {
4650 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4651 EmitOptionalRex32(reg);
4652 EmitUint8(0xF7);
4653 EmitOperand(5, Operand(reg));
4654 }
4655
4656
imulq(CpuRegister reg)4657 void X86_64Assembler::imulq(CpuRegister reg) {
4658 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4659 EmitRex64(reg);
4660 EmitUint8(0xF7);
4661 EmitOperand(5, Operand(reg));
4662 }
4663
4664
imull(const Address & address)4665 void X86_64Assembler::imull(const Address& address) {
4666 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4667 EmitOptionalRex32(address);
4668 EmitUint8(0xF7);
4669 EmitOperand(5, address);
4670 }
4671
4672
mull(CpuRegister reg)4673 void X86_64Assembler::mull(CpuRegister reg) {
4674 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4675 EmitOptionalRex32(reg);
4676 EmitUint8(0xF7);
4677 EmitOperand(4, Operand(reg));
4678 }
4679
4680
mull(const Address & address)4681 void X86_64Assembler::mull(const Address& address) {
4682 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4683 EmitOptionalRex32(address);
4684 EmitUint8(0xF7);
4685 EmitOperand(4, address);
4686 }
4687
4688
shll(CpuRegister reg,const Immediate & imm)4689 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4690 EmitGenericShift(false, 4, reg, imm);
4691 }
4692
4693
shlq(CpuRegister reg,const Immediate & imm)4694 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4695 EmitGenericShift(true, 4, reg, imm);
4696 }
4697
4698
shll(CpuRegister operand,CpuRegister shifter)4699 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4700 EmitGenericShift(false, 4, operand, shifter);
4701 }
4702
4703
shlq(CpuRegister operand,CpuRegister shifter)4704 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4705 EmitGenericShift(true, 4, operand, shifter);
4706 }
4707
4708
shrl(CpuRegister reg,const Immediate & imm)4709 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4710 EmitGenericShift(false, 5, reg, imm);
4711 }
4712
4713
shrq(CpuRegister reg,const Immediate & imm)4714 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4715 EmitGenericShift(true, 5, reg, imm);
4716 }
4717
4718
shrl(CpuRegister operand,CpuRegister shifter)4719 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4720 EmitGenericShift(false, 5, operand, shifter);
4721 }
4722
4723
shrq(CpuRegister operand,CpuRegister shifter)4724 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4725 EmitGenericShift(true, 5, operand, shifter);
4726 }
4727
4728
sarl(CpuRegister reg,const Immediate & imm)4729 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4730 EmitGenericShift(false, 7, reg, imm);
4731 }
4732
4733
sarl(CpuRegister operand,CpuRegister shifter)4734 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4735 EmitGenericShift(false, 7, operand, shifter);
4736 }
4737
4738
sarq(CpuRegister reg,const Immediate & imm)4739 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4740 EmitGenericShift(true, 7, reg, imm);
4741 }
4742
4743
sarq(CpuRegister operand,CpuRegister shifter)4744 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4745 EmitGenericShift(true, 7, operand, shifter);
4746 }
4747
4748
roll(CpuRegister reg,const Immediate & imm)4749 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4750 EmitGenericShift(false, 0, reg, imm);
4751 }
4752
4753
roll(CpuRegister operand,CpuRegister shifter)4754 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4755 EmitGenericShift(false, 0, operand, shifter);
4756 }
4757
4758
rorl(CpuRegister reg,const Immediate & imm)4759 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4760 EmitGenericShift(false, 1, reg, imm);
4761 }
4762
4763
rorl(CpuRegister operand,CpuRegister shifter)4764 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4765 EmitGenericShift(false, 1, operand, shifter);
4766 }
4767
4768
rolq(CpuRegister reg,const Immediate & imm)4769 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4770 EmitGenericShift(true, 0, reg, imm);
4771 }
4772
4773
rolq(CpuRegister operand,CpuRegister shifter)4774 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4775 EmitGenericShift(true, 0, operand, shifter);
4776 }
4777
4778
rorq(CpuRegister reg,const Immediate & imm)4779 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4780 EmitGenericShift(true, 1, reg, imm);
4781 }
4782
4783
rorq(CpuRegister operand,CpuRegister shifter)4784 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4785 EmitGenericShift(true, 1, operand, shifter);
4786 }
4787
4788
negl(CpuRegister reg)4789 void X86_64Assembler::negl(CpuRegister reg) {
4790 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4791 EmitOptionalRex32(reg);
4792 EmitUint8(0xF7);
4793 EmitOperand(3, Operand(reg));
4794 }
4795
4796
negq(CpuRegister reg)4797 void X86_64Assembler::negq(CpuRegister reg) {
4798 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4799 EmitRex64(reg);
4800 EmitUint8(0xF7);
4801 EmitOperand(3, Operand(reg));
4802 }
4803
4804
notl(CpuRegister reg)4805 void X86_64Assembler::notl(CpuRegister reg) {
4806 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4807 EmitOptionalRex32(reg);
4808 EmitUint8(0xF7);
4809 EmitUint8(0xD0 | reg.LowBits());
4810 }
4811
4812
notq(CpuRegister reg)4813 void X86_64Assembler::notq(CpuRegister reg) {
4814 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4815 EmitRex64(reg);
4816 EmitUint8(0xF7);
4817 EmitOperand(2, Operand(reg));
4818 }
4819
4820
enter(const Immediate & imm)4821 void X86_64Assembler::enter(const Immediate& imm) {
4822 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4823 EmitUint8(0xC8);
4824 CHECK(imm.is_uint16()) << imm.value();
4825 EmitUint8(imm.value() & 0xFF);
4826 EmitUint8((imm.value() >> 8) & 0xFF);
4827 EmitUint8(0x00);
4828 }
4829
4830
leave()4831 void X86_64Assembler::leave() {
4832 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4833 EmitUint8(0xC9);
4834 }
4835
4836
ret()4837 void X86_64Assembler::ret() {
4838 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4839 EmitUint8(0xC3);
4840 }
4841
4842
ret(const Immediate & imm)4843 void X86_64Assembler::ret(const Immediate& imm) {
4844 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4845 EmitUint8(0xC2);
4846 CHECK(imm.is_uint16());
4847 EmitUint8(imm.value() & 0xFF);
4848 EmitUint8((imm.value() >> 8) & 0xFF);
4849 }
4850
4851
4852
nop()4853 void X86_64Assembler::nop() {
4854 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855 EmitUint8(0x90);
4856 }
4857
4858
int3()4859 void X86_64Assembler::int3() {
4860 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4861 EmitUint8(0xCC);
4862 }
4863
4864
hlt()4865 void X86_64Assembler::hlt() {
4866 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4867 EmitUint8(0xF4);
4868 }
4869
4870
j(Condition condition,Label * label)4871 void X86_64Assembler::j(Condition condition, Label* label) {
4872 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4873 if (label->IsBound()) {
4874 static const int kShortSize = 2;
4875 static const int kLongSize = 6;
4876 int offset = label->Position() - buffer_.Size();
4877 CHECK_LE(offset, 0);
4878 if (IsInt<8>(offset - kShortSize)) {
4879 EmitUint8(0x70 + condition);
4880 EmitUint8((offset - kShortSize) & 0xFF);
4881 } else {
4882 EmitUint8(0x0F);
4883 EmitUint8(0x80 + condition);
4884 EmitInt32(offset - kLongSize);
4885 }
4886 } else {
4887 EmitUint8(0x0F);
4888 EmitUint8(0x80 + condition);
4889 EmitLabelLink(label);
4890 }
4891 }
4892
4893
j(Condition condition,NearLabel * label)4894 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4895 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4896 if (label->IsBound()) {
4897 static const int kShortSize = 2;
4898 int offset = label->Position() - buffer_.Size();
4899 CHECK_LE(offset, 0);
4900 CHECK(IsInt<8>(offset - kShortSize));
4901 EmitUint8(0x70 + condition);
4902 EmitUint8((offset - kShortSize) & 0xFF);
4903 } else {
4904 EmitUint8(0x70 + condition);
4905 EmitLabelLink(label);
4906 }
4907 }
4908
4909
jrcxz(NearLabel * label)4910 void X86_64Assembler::jrcxz(NearLabel* label) {
4911 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4912 if (label->IsBound()) {
4913 static const int kShortSize = 2;
4914 int offset = label->Position() - buffer_.Size();
4915 CHECK_LE(offset, 0);
4916 CHECK(IsInt<8>(offset - kShortSize));
4917 EmitUint8(0xE3);
4918 EmitUint8((offset - kShortSize) & 0xFF);
4919 } else {
4920 EmitUint8(0xE3);
4921 EmitLabelLink(label);
4922 }
4923 }
4924
4925
jmp(CpuRegister reg)4926 void X86_64Assembler::jmp(CpuRegister reg) {
4927 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4928 EmitOptionalRex32(reg);
4929 EmitUint8(0xFF);
4930 EmitRegisterOperand(4, reg.LowBits());
4931 }
4932
jmp(const Address & address)4933 void X86_64Assembler::jmp(const Address& address) {
4934 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4935 EmitOptionalRex32(address);
4936 EmitUint8(0xFF);
4937 EmitOperand(4, address);
4938 }
4939
jmp(Label * label)4940 void X86_64Assembler::jmp(Label* label) {
4941 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4942 if (label->IsBound()) {
4943 static const int kShortSize = 2;
4944 static const int kLongSize = 5;
4945 int offset = label->Position() - buffer_.Size();
4946 CHECK_LE(offset, 0);
4947 if (IsInt<8>(offset - kShortSize)) {
4948 EmitUint8(0xEB);
4949 EmitUint8((offset - kShortSize) & 0xFF);
4950 } else {
4951 EmitUint8(0xE9);
4952 EmitInt32(offset - kLongSize);
4953 }
4954 } else {
4955 EmitUint8(0xE9);
4956 EmitLabelLink(label);
4957 }
4958 }
4959
4960
jmp(NearLabel * label)4961 void X86_64Assembler::jmp(NearLabel* label) {
4962 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4963 if (label->IsBound()) {
4964 static const int kShortSize = 2;
4965 int offset = label->Position() - buffer_.Size();
4966 CHECK_LE(offset, 0);
4967 CHECK(IsInt<8>(offset - kShortSize));
4968 EmitUint8(0xEB);
4969 EmitUint8((offset - kShortSize) & 0xFF);
4970 } else {
4971 EmitUint8(0xEB);
4972 EmitLabelLink(label);
4973 }
4974 }
4975
4976
rep_movsw()4977 void X86_64Assembler::rep_movsw() {
4978 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4979 EmitUint8(0x66);
4980 EmitUint8(0xF3);
4981 EmitUint8(0xA5);
4982 }
4983
rep_movsb()4984 void X86_64Assembler::rep_movsb() {
4985 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4986 EmitUint8(0xF3);
4987 EmitUint8(0xA4);
4988 }
4989
rep_movsl()4990 void X86_64Assembler::rep_movsl() {
4991 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4992 EmitUint8(0xF3);
4993 EmitUint8(0xA5);
4994 }
4995
lock()4996 X86_64Assembler* X86_64Assembler::lock() {
4997 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998 EmitUint8(0xF0);
4999 return this;
5000 }
5001
5002
cmpxchgb(const Address & address,CpuRegister reg)5003 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
5004 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5005 EmitOptionalByteRegNormalizingRex32(reg, address);
5006 EmitUint8(0x0F);
5007 EmitUint8(0xB0);
5008 EmitOperand(reg.LowBits(), address);
5009 }
5010
5011
cmpxchgw(const Address & address,CpuRegister reg)5012 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
5013 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5014 EmitOperandSizeOverride();
5015 EmitOptionalRex32(reg, address);
5016 EmitUint8(0x0F);
5017 EmitUint8(0xB1);
5018 EmitOperand(reg.LowBits(), address);
5019 }
5020
5021
cmpxchgl(const Address & address,CpuRegister reg)5022 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
5023 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5024 EmitOptionalRex32(reg, address);
5025 EmitUint8(0x0F);
5026 EmitUint8(0xB1);
5027 EmitOperand(reg.LowBits(), address);
5028 }
5029
5030
cmpxchgq(const Address & address,CpuRegister reg)5031 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
5032 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5033 EmitRex64(reg, address);
5034 EmitUint8(0x0F);
5035 EmitUint8(0xB1);
5036 EmitOperand(reg.LowBits(), address);
5037 }
5038
5039
mfence()5040 void X86_64Assembler::mfence() {
5041 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5042 EmitUint8(0x0F);
5043 EmitUint8(0xAE);
5044 EmitUint8(0xF0);
5045 }
5046
5047
gs()5048 X86_64Assembler* X86_64Assembler::gs() {
5049 // TODO: gs is a prefix and not an instruction
5050 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5051 EmitUint8(0x65);
5052 return this;
5053 }
5054
5055
AddImmediate(CpuRegister reg,const Immediate & imm)5056 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
5057 int value = imm.value();
5058 if (value != 0) {
5059 if (value > 0) {
5060 addl(reg, imm);
5061 } else {
5062 subl(reg, Immediate(value));
5063 }
5064 }
5065 }
5066
5067
setcc(Condition condition,CpuRegister dst)5068 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
5069 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5070 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
5071 if (dst.NeedsRex() || dst.AsRegister() > 3) {
5072 EmitOptionalRex(true, false, false, false, dst.NeedsRex());
5073 }
5074 EmitUint8(0x0F);
5075 EmitUint8(0x90 + condition);
5076 EmitUint8(0xC0 + dst.LowBits());
5077 }
5078
blsi(CpuRegister dst,CpuRegister src)5079 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
5080 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5081 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5082 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5083 /*X=*/ false,
5084 src.NeedsRex(),
5085 SET_VEX_M_0F_38);
5086 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
5087 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5088 SET_VEX_L_128,
5089 SET_VEX_PP_NONE);
5090 EmitUint8(byte_zero);
5091 EmitUint8(byte_one);
5092 EmitUint8(byte_two);
5093 EmitUint8(0xF3);
5094 EmitRegisterOperand(3, src.LowBits());
5095 }
5096
blsmsk(CpuRegister dst,CpuRegister src)5097 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
5098 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5099 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5100 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5101 /*X=*/ false,
5102 src.NeedsRex(),
5103 SET_VEX_M_0F_38);
5104 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5105 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5106 SET_VEX_L_128,
5107 SET_VEX_PP_NONE);
5108 EmitUint8(byte_zero);
5109 EmitUint8(byte_one);
5110 EmitUint8(byte_two);
5111 EmitUint8(0xF3);
5112 EmitRegisterOperand(2, src.LowBits());
5113 }
5114
blsr(CpuRegister dst,CpuRegister src)5115 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
5116 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5117 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
5118 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5119 /*X=*/ false,
5120 src.NeedsRex(),
5121 SET_VEX_M_0F_38);
5122 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5123 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5124 SET_VEX_L_128,
5125 SET_VEX_PP_NONE);
5126 EmitUint8(byte_zero);
5127 EmitUint8(byte_one);
5128 EmitUint8(byte_two);
5129 EmitUint8(0xF3);
5130 EmitRegisterOperand(1, src.LowBits());
5131 }
5132
bswapl(CpuRegister dst)5133 void X86_64Assembler::bswapl(CpuRegister dst) {
5134 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5135 EmitOptionalRex(false, false, false, false, dst.NeedsRex());
5136 EmitUint8(0x0F);
5137 EmitUint8(0xC8 + dst.LowBits());
5138 }
5139
bswapq(CpuRegister dst)5140 void X86_64Assembler::bswapq(CpuRegister dst) {
5141 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5142 EmitOptionalRex(false, true, false, false, dst.NeedsRex());
5143 EmitUint8(0x0F);
5144 EmitUint8(0xC8 + dst.LowBits());
5145 }
5146
bsfl(CpuRegister dst,CpuRegister src)5147 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
5148 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5149 EmitOptionalRex32(dst, src);
5150 EmitUint8(0x0F);
5151 EmitUint8(0xBC);
5152 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5153 }
5154
bsfl(CpuRegister dst,const Address & src)5155 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
5156 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5157 EmitOptionalRex32(dst, src);
5158 EmitUint8(0x0F);
5159 EmitUint8(0xBC);
5160 EmitOperand(dst.LowBits(), src);
5161 }
5162
bsfq(CpuRegister dst,CpuRegister src)5163 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
5164 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5165 EmitRex64(dst, src);
5166 EmitUint8(0x0F);
5167 EmitUint8(0xBC);
5168 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5169 }
5170
bsfq(CpuRegister dst,const Address & src)5171 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
5172 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5173 EmitRex64(dst, src);
5174 EmitUint8(0x0F);
5175 EmitUint8(0xBC);
5176 EmitOperand(dst.LowBits(), src);
5177 }
5178
bsrl(CpuRegister dst,CpuRegister src)5179 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
5180 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5181 EmitOptionalRex32(dst, src);
5182 EmitUint8(0x0F);
5183 EmitUint8(0xBD);
5184 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5185 }
5186
bsrl(CpuRegister dst,const Address & src)5187 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
5188 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5189 EmitOptionalRex32(dst, src);
5190 EmitUint8(0x0F);
5191 EmitUint8(0xBD);
5192 EmitOperand(dst.LowBits(), src);
5193 }
5194
bsrq(CpuRegister dst,CpuRegister src)5195 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5196 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5197 EmitRex64(dst, src);
5198 EmitUint8(0x0F);
5199 EmitUint8(0xBD);
5200 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5201 }
5202
bsrq(CpuRegister dst,const Address & src)5203 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5204 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5205 EmitRex64(dst, src);
5206 EmitUint8(0x0F);
5207 EmitUint8(0xBD);
5208 EmitOperand(dst.LowBits(), src);
5209 }
5210
popcntl(CpuRegister dst,CpuRegister src)5211 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5212 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5213 EmitUint8(0xF3);
5214 EmitOptionalRex32(dst, src);
5215 EmitUint8(0x0F);
5216 EmitUint8(0xB8);
5217 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5218 }
5219
popcntl(CpuRegister dst,const Address & src)5220 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5221 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5222 EmitUint8(0xF3);
5223 EmitOptionalRex32(dst, src);
5224 EmitUint8(0x0F);
5225 EmitUint8(0xB8);
5226 EmitOperand(dst.LowBits(), src);
5227 }
5228
popcntq(CpuRegister dst,CpuRegister src)5229 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5230 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5231 EmitUint8(0xF3);
5232 EmitRex64(dst, src);
5233 EmitUint8(0x0F);
5234 EmitUint8(0xB8);
5235 EmitRegisterOperand(dst.LowBits(), src.LowBits());
5236 }
5237
popcntq(CpuRegister dst,const Address & src)5238 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5239 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5240 EmitUint8(0xF3);
5241 EmitRex64(dst, src);
5242 EmitUint8(0x0F);
5243 EmitUint8(0xB8);
5244 EmitOperand(dst.LowBits(), src);
5245 }
5246
repne_scasb()5247 void X86_64Assembler::repne_scasb() {
5248 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5249 EmitUint8(0xF2);
5250 EmitUint8(0xAE);
5251 }
5252
repne_scasw()5253 void X86_64Assembler::repne_scasw() {
5254 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5255 EmitUint8(0x66);
5256 EmitUint8(0xF2);
5257 EmitUint8(0xAF);
5258 }
5259
repe_cmpsw()5260 void X86_64Assembler::repe_cmpsw() {
5261 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5262 EmitUint8(0x66);
5263 EmitUint8(0xF3);
5264 EmitUint8(0xA7);
5265 }
5266
5267
repe_cmpsl()5268 void X86_64Assembler::repe_cmpsl() {
5269 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5270 EmitUint8(0xF3);
5271 EmitUint8(0xA7);
5272 }
5273
5274
repe_cmpsq()5275 void X86_64Assembler::repe_cmpsq() {
5276 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5277 EmitUint8(0xF3);
5278 EmitRex64();
5279 EmitUint8(0xA7);
5280 }
5281
ud2()5282 void X86_64Assembler::ud2() {
5283 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5284 EmitUint8(0x0F);
5285 EmitUint8(0x0B);
5286 }
5287
LoadDoubleConstant(XmmRegister dst,double value)5288 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5289 // TODO: Need to have a code constants table.
5290 int64_t constant = bit_cast<int64_t, double>(value);
5291 pushq(Immediate(High32Bits(constant)));
5292 pushq(Immediate(Low32Bits(constant)));
5293 movsd(dst, Address(CpuRegister(RSP), 0));
5294 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5295 }
5296
5297
Align(int alignment,int offset)5298 void X86_64Assembler::Align(int alignment, int offset) {
5299 CHECK(IsPowerOfTwo(alignment));
5300 // Emit nop instruction until the real position is aligned.
5301 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5302 nop();
5303 }
5304 }
5305
5306
Bind(Label * label)5307 void X86_64Assembler::Bind(Label* label) {
5308 int bound = buffer_.Size();
5309 CHECK(!label->IsBound()); // Labels can only be bound once.
5310 while (label->IsLinked()) {
5311 int position = label->LinkPosition();
5312 int next = buffer_.Load<int32_t>(position);
5313 buffer_.Store<int32_t>(position, bound - (position + 4));
5314 label->position_ = next;
5315 }
5316 label->BindTo(bound);
5317 }
5318
5319
Bind(NearLabel * label)5320 void X86_64Assembler::Bind(NearLabel* label) {
5321 int bound = buffer_.Size();
5322 CHECK(!label->IsBound()); // Labels can only be bound once.
5323 while (label->IsLinked()) {
5324 int position = label->LinkPosition();
5325 uint8_t delta = buffer_.Load<uint8_t>(position);
5326 int offset = bound - (position + 1);
5327 CHECK(IsInt<8>(offset));
5328 buffer_.Store<int8_t>(position, offset);
5329 label->position_ = delta != 0u ? label->position_ - delta : 0;
5330 }
5331 label->BindTo(bound);
5332 }
5333
5334
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5335 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5336 CHECK_GE(reg_or_opcode, 0);
5337 CHECK_LT(reg_or_opcode, 8);
5338 const int length = operand.length_;
5339 CHECK_GT(length, 0);
5340 // Emit the ModRM byte updated with the given reg value.
5341 CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5342 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5343 // Emit the rest of the encoded operand.
5344 for (int i = 1; i < length; i++) {
5345 EmitUint8(operand.encoding_[i]);
5346 }
5347 AssemblerFixup* fixup = operand.GetFixup();
5348 if (fixup != nullptr) {
5349 EmitFixup(fixup);
5350 }
5351 }
5352
5353
EmitImmediate(const Immediate & imm,bool is_16_op)5354 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5355 if (is_16_op) {
5356 EmitUint8(imm.value() & 0xFF);
5357 EmitUint8(imm.value() >> 8);
5358 } else if (imm.is_int32()) {
5359 EmitInt32(static_cast<int32_t>(imm.value()));
5360 } else {
5361 EmitInt64(imm.value());
5362 }
5363 }
5364
5365
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5366 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5367 const Operand& operand,
5368 const Immediate& immediate,
5369 bool is_16_op) {
5370 CHECK_GE(reg_or_opcode, 0);
5371 CHECK_LT(reg_or_opcode, 8);
5372 if (immediate.is_int8()) {
5373 // Use sign-extended 8-bit immediate.
5374 EmitUint8(0x83);
5375 EmitOperand(reg_or_opcode, operand);
5376 EmitUint8(immediate.value() & 0xFF);
5377 } else if (operand.IsRegister(CpuRegister(RAX))) {
5378 // Use short form if the destination is eax.
5379 EmitUint8(0x05 + (reg_or_opcode << 3));
5380 EmitImmediate(immediate, is_16_op);
5381 } else {
5382 EmitUint8(0x81);
5383 EmitOperand(reg_or_opcode, operand);
5384 EmitImmediate(immediate, is_16_op);
5385 }
5386 }
5387
5388
EmitLabel(Label * label,int instruction_size)5389 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5390 if (label->IsBound()) {
5391 int offset = label->Position() - buffer_.Size();
5392 CHECK_LE(offset, 0);
5393 EmitInt32(offset - instruction_size);
5394 } else {
5395 EmitLabelLink(label);
5396 }
5397 }
5398
5399
EmitLabelLink(Label * label)5400 void X86_64Assembler::EmitLabelLink(Label* label) {
5401 CHECK(!label->IsBound());
5402 int position = buffer_.Size();
5403 EmitInt32(label->position_);
5404 label->LinkTo(position);
5405 }
5406
5407
EmitLabelLink(NearLabel * label)5408 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5409 CHECK(!label->IsBound());
5410 int position = buffer_.Size();
5411 if (label->IsLinked()) {
5412 // Save the delta in the byte that we have to play with.
5413 uint32_t delta = position - label->LinkPosition();
5414 CHECK(IsUint<8>(delta));
5415 EmitUint8(delta & 0xFF);
5416 } else {
5417 EmitUint8(0);
5418 }
5419 label->LinkTo(position);
5420 }
5421
5422
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5423 void X86_64Assembler::EmitGenericShift(bool wide,
5424 int reg_or_opcode,
5425 CpuRegister reg,
5426 const Immediate& imm) {
5427 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5428 CHECK(imm.is_int8());
5429 if (wide) {
5430 EmitRex64(reg);
5431 } else {
5432 EmitOptionalRex32(reg);
5433 }
5434 if (imm.value() == 1) {
5435 EmitUint8(0xD1);
5436 EmitOperand(reg_or_opcode, Operand(reg));
5437 } else {
5438 EmitUint8(0xC1);
5439 EmitOperand(reg_or_opcode, Operand(reg));
5440 EmitUint8(imm.value() & 0xFF);
5441 }
5442 }
5443
5444
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5445 void X86_64Assembler::EmitGenericShift(bool wide,
5446 int reg_or_opcode,
5447 CpuRegister operand,
5448 CpuRegister shifter) {
5449 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5450 CHECK_EQ(shifter.AsRegister(), RCX);
5451 if (wide) {
5452 EmitRex64(operand);
5453 } else {
5454 EmitOptionalRex32(operand);
5455 }
5456 EmitUint8(0xD3);
5457 EmitOperand(reg_or_opcode, Operand(operand));
5458 }
5459
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5460 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5461 // REX.WRXB
5462 // W - 64-bit operand
5463 // R - MODRM.reg
5464 // X - SIB.index
5465 // B - MODRM.rm/SIB.base
5466 uint8_t rex = force ? 0x40 : 0;
5467 if (w) {
5468 rex |= 0x48; // REX.W000
5469 }
5470 if (r) {
5471 rex |= 0x44; // REX.0R00
5472 }
5473 if (x) {
5474 rex |= 0x42; // REX.00X0
5475 }
5476 if (b) {
5477 rex |= 0x41; // REX.000B
5478 }
5479 if (rex != 0) {
5480 EmitUint8(rex);
5481 }
5482 }
5483
EmitOptionalRex32(CpuRegister reg)5484 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5485 EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5486 }
5487
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5488 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5489 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5490 }
5491
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5492 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5493 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5494 }
5495
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5496 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5497 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5498 }
5499
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5500 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5501 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5502 }
5503
EmitOptionalRex32(const Operand & operand)5504 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5505 uint8_t rex = operand.rex();
5506 if (rex != 0) {
5507 EmitUint8(rex);
5508 }
5509 }
5510
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5511 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5512 uint8_t rex = operand.rex();
5513 if (dst.NeedsRex()) {
5514 rex |= 0x44; // REX.0R00
5515 }
5516 if (rex != 0) {
5517 EmitUint8(rex);
5518 }
5519 }
5520
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5521 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5522 uint8_t rex = operand.rex();
5523 if (dst.NeedsRex()) {
5524 rex |= 0x44; // REX.0R00
5525 }
5526 if (rex != 0) {
5527 EmitUint8(rex);
5528 }
5529 }
5530
EmitRex64()5531 void X86_64Assembler::EmitRex64() {
5532 EmitOptionalRex(false, true, false, false, false);
5533 }
5534
EmitRex64(CpuRegister reg)5535 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5536 EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5537 }
5538
EmitRex64(const Operand & operand)5539 void X86_64Assembler::EmitRex64(const Operand& operand) {
5540 uint8_t rex = operand.rex();
5541 rex |= 0x48; // REX.W000
5542 EmitUint8(rex);
5543 }
5544
EmitRex64(CpuRegister dst,CpuRegister src)5545 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5546 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5547 }
5548
EmitRex64(XmmRegister dst,CpuRegister src)5549 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5550 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5551 }
5552
EmitRex64(CpuRegister dst,XmmRegister src)5553 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5554 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5555 }
5556
EmitRex64(CpuRegister dst,const Operand & operand)5557 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5558 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5559 if (dst.NeedsRex()) {
5560 rex |= 0x44; // REX.0R00
5561 }
5562 EmitUint8(rex);
5563 }
5564
EmitRex64(XmmRegister dst,const Operand & operand)5565 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5566 uint8_t rex = 0x48 | operand.rex(); // REX.W000
5567 if (dst.NeedsRex()) {
5568 rex |= 0x44; // REX.0R00
5569 }
5570 EmitUint8(rex);
5571 }
5572
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)5573 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
5574 CpuRegister src,
5575 bool normalize_both) {
5576 // SPL, BPL, SIL, DIL need the REX prefix.
5577 bool force = src.AsRegister() > 3;
5578 if (normalize_both) {
5579 // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
5580 // prefix if either `src` or `dst` needs it.
5581 force |= dst.AsRegister() > 3;
5582 } else {
5583 // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
5584 // They need REX prefix only if `src` needs it, but not `dst`.
5585 }
5586 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5587 }
5588
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5589 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5590 uint8_t rex = operand.rex();
5591 // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5592 bool force = dst.AsRegister() > 3;
5593 if (force) {
5594 rex |= 0x40; // REX.0000
5595 }
5596 if (dst.NeedsRex()) {
5597 rex |= 0x44; // REX.0R00
5598 }
5599 if (rex != 0) {
5600 EmitUint8(rex);
5601 }
5602 }
5603
AddConstantArea()5604 void X86_64Assembler::AddConstantArea() {
5605 ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5606 for (size_t i = 0, e = area.size(); i < e; i++) {
5607 AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5608 EmitInt32(area[i]);
5609 }
5610 }
5611
AppendInt32(int32_t v)5612 size_t ConstantArea::AppendInt32(int32_t v) {
5613 size_t result = buffer_.size() * elem_size_;
5614 buffer_.push_back(v);
5615 return result;
5616 }
5617
AddInt32(int32_t v)5618 size_t ConstantArea::AddInt32(int32_t v) {
5619 // Look for an existing match.
5620 for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5621 if (v == buffer_[i]) {
5622 return i * elem_size_;
5623 }
5624 }
5625
5626 // Didn't match anything.
5627 return AppendInt32(v);
5628 }
5629
AddInt64(int64_t v)5630 size_t ConstantArea::AddInt64(int64_t v) {
5631 int32_t v_low = v;
5632 int32_t v_high = v >> 32;
5633 if (buffer_.size() > 1) {
5634 // Ensure we don't pass the end of the buffer.
5635 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5636 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5637 return i * elem_size_;
5638 }
5639 }
5640 }
5641
5642 // Didn't match anything.
5643 size_t result = buffer_.size() * elem_size_;
5644 buffer_.push_back(v_low);
5645 buffer_.push_back(v_high);
5646 return result;
5647 }
5648
AddDouble(double v)5649 size_t ConstantArea::AddDouble(double v) {
5650 // Treat the value as a 64-bit integer value.
5651 return AddInt64(bit_cast<int64_t, double>(v));
5652 }
5653
AddFloat(float v)5654 size_t ConstantArea::AddFloat(float v) {
5655 // Treat the value as a 32-bit integer value.
5656 return AddInt32(bit_cast<int32_t, float>(v));
5657 }
5658
EmitVexPrefixByteZero(bool is_twobyte_form)5659 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5660 // Vex Byte 0,
5661 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5662 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5663 uint8_t vex_prefix = 0xC0;
5664 if (is_twobyte_form) {
5665 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex
5666 } else {
5667 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex
5668 }
5669 return vex_prefix;
5670 }
5671
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5672 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5673 // Vex Byte 1,
5674 uint8_t vex_prefix = VEX_INIT;
5675 /** Bit[7] This bit needs to be set to '1'
5676 otherwise the instruction is LES or LDS */
5677 if (!R) {
5678 // R .
5679 vex_prefix |= SET_VEX_R;
5680 }
5681 /** Bit[6] This bit needs to be set to '1'
5682 otherwise the instruction is LES or LDS */
5683 if (!X) {
5684 // X .
5685 vex_prefix |= SET_VEX_X;
5686 }
5687 /** Bit[5] This bit needs to be set to '1' */
5688 if (!B) {
5689 // B .
5690 vex_prefix |= SET_VEX_B;
5691 }
5692 /** Bits[4:0], Based on the instruction documentaion */
5693 vex_prefix |= SET_VEX_M;
5694 return vex_prefix;
5695 }
5696
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5697 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5698 X86_64ManagedRegister operand,
5699 int SET_VEX_L,
5700 int SET_VEX_PP) {
5701 // Vex Byte 1,
5702 uint8_t vex_prefix = VEX_INIT;
5703 /** Bit[7] This bit needs to be set to '1'
5704 otherwise the instruction is LES or LDS */
5705 if (!R) {
5706 // R .
5707 vex_prefix |= SET_VEX_R;
5708 }
5709 /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5710 if (operand.IsNoRegister()) {
5711 vex_prefix |= 0x78;
5712 } else if (operand.IsXmmRegister()) {
5713 XmmRegister vvvv = operand.AsXmmRegister();
5714 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5715 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5716 vex_prefix |= ((reg & 0x0F) << 3);
5717 } else if (operand.IsCpuRegister()) {
5718 CpuRegister vvvv = operand.AsCpuRegister();
5719 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5720 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5721 vex_prefix |= ((reg & 0x0F) << 3);
5722 }
5723 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5724 VEX.L = 0 indicates 128 bit vector operation */
5725 vex_prefix |= SET_VEX_L;
5726 // Bits[1:0] - "pp"
5727 vex_prefix |= SET_VEX_PP;
5728 return vex_prefix;
5729 }
5730
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5731 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5732 X86_64ManagedRegister operand,
5733 int SET_VEX_L,
5734 int SET_VEX_PP) {
5735 // Vex Byte 2,
5736 uint8_t vex_prefix = VEX_INIT;
5737
5738 /** Bit[7] This bits needs to be set to '1' with default value.
5739 When using C4H form of VEX prefix, REX.W value is ignored */
5740 if (W) {
5741 vex_prefix |= SET_VEX_W;
5742 }
5743 // Bits[6:3] - 'vvvv' the source or dest register specifier
5744 if (operand.IsXmmRegister()) {
5745 XmmRegister vvvv = operand.AsXmmRegister();
5746 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5747 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5748 vex_prefix |= ((reg & 0x0F) << 3);
5749 } else if (operand.IsCpuRegister()) {
5750 CpuRegister vvvv = operand.AsCpuRegister();
5751 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5752 uint8_t reg = static_cast<uint8_t>(inverted_reg);
5753 vex_prefix |= ((reg & 0x0F) << 3);
5754 }
5755 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5756 VEX.L = 0 indicates 128 bit vector operation */
5757 vex_prefix |= SET_VEX_L;
5758 // Bits[1:0] - "pp"
5759 vex_prefix |= SET_VEX_PP;
5760 return vex_prefix;
5761 }
5762
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5763 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5764 int SET_VEX_L,
5765 int SET_VEX_PP) {
5766 // Vex Byte 2,
5767 uint8_t vex_prefix = VEX_INIT;
5768
5769 /** Bit[7] This bits needs to be set to '1' with default value.
5770 When using C4H form of VEX prefix, REX.W value is ignored */
5771 if (W) {
5772 vex_prefix |= SET_VEX_W;
5773 }
5774 /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5775 vex_prefix |= (0x0F << 3);
5776 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5777 VEX.L = 0 indicates 128 bit vector operation */
5778 vex_prefix |= SET_VEX_L;
5779
5780 // Bits[1:0] - "pp"
5781 if (SET_VEX_PP != SET_VEX_PP_NONE) {
5782 vex_prefix |= SET_VEX_PP;
5783 }
5784 return vex_prefix;
5785 }
5786
5787 } // namespace x86_64
5788 } // namespace art
5789