1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6 #include <stdint.h>
7
8 #include <limits>
9 #include <type_traits>
10 #include <vector>
11
12 #include "src/base/bits.h"
13 #include "src/base/flags.h"
14 #include "src/base/iterator.h"
15 #include "src/base/logging.h"
16 #include "src/base/macros.h"
17 #include "src/base/platform/wrappers.h"
18 #include "src/codegen/cpu-features.h"
19 #include "src/codegen/ia32/assembler-ia32.h"
20 #include "src/codegen/ia32/register-ia32.h"
21 #include "src/codegen/machine-type.h"
22 #include "src/codegen/turbo-assembler.h"
23 #include "src/common/globals.h"
24 #include "src/compiler/backend/instruction-codes.h"
25 #include "src/compiler/backend/instruction-selector-impl.h"
26 #include "src/compiler/backend/instruction-selector.h"
27 #include "src/compiler/backend/instruction.h"
28 #include "src/compiler/common-operator.h"
29 #include "src/compiler/frame.h"
30 #include "src/compiler/globals.h"
31 #include "src/compiler/linkage.h"
32 #include "src/compiler/machine-operator.h"
33 #include "src/compiler/node-matchers.h"
34 #include "src/compiler/node-properties.h"
35 #include "src/compiler/node.h"
36 #include "src/compiler/opcodes.h"
37 #include "src/compiler/operator.h"
38 #include "src/compiler/write-barrier-kind.h"
39 #include "src/flags/flags.h"
40 #include "src/utils/utils.h"
41 #include "src/zone/zone-containers.h"
42
43 #if V8_ENABLE_WEBASSEMBLY
44 #include "src/wasm/simd-shuffle.h"
45 #endif // V8_ENABLE_WEBASSEMBLY
46
47 namespace v8 {
48 namespace internal {
49 namespace compiler {
50
51 // Adds IA32-specific methods for generating operands.
52 class IA32OperandGenerator final : public OperandGenerator {
53 public:
IA32OperandGenerator(InstructionSelector * selector)54 explicit IA32OperandGenerator(InstructionSelector* selector)
55 : OperandGenerator(selector) {}
56
UseByteRegister(Node * node)57 InstructionOperand UseByteRegister(Node* node) {
58 // TODO(titzer): encode byte register use constraints.
59 return UseFixed(node, edx);
60 }
61
DefineAsByteRegister(Node * node)62 InstructionOperand DefineAsByteRegister(Node* node) {
63 // TODO(titzer): encode byte register def constraints.
64 return DefineAsRegister(node);
65 }
66
CanBeMemoryOperand(InstructionCode opcode,Node * node,Node * input,int effect_level)67 bool CanBeMemoryOperand(InstructionCode opcode, Node* node, Node* input,
68 int effect_level) {
69 if ((input->opcode() != IrOpcode::kLoad &&
70 input->opcode() != IrOpcode::kLoadImmutable) ||
71 !selector()->CanCover(node, input)) {
72 return false;
73 }
74 if (effect_level != selector()->GetEffectLevel(input)) {
75 return false;
76 }
77 MachineRepresentation rep =
78 LoadRepresentationOf(input->op()).representation();
79 switch (opcode) {
80 case kIA32And:
81 case kIA32Or:
82 case kIA32Xor:
83 case kIA32Add:
84 case kIA32Sub:
85 case kIA32Cmp:
86 case kIA32Test:
87 return rep == MachineRepresentation::kWord32 || IsAnyTagged(rep);
88 case kIA32Cmp16:
89 case kIA32Test16:
90 return rep == MachineRepresentation::kWord16;
91 case kIA32Cmp8:
92 case kIA32Test8:
93 return rep == MachineRepresentation::kWord8;
94 default:
95 break;
96 }
97 return false;
98 }
99
CanBeImmediate(Node * node)100 bool CanBeImmediate(Node* node) {
101 switch (node->opcode()) {
102 case IrOpcode::kInt32Constant:
103 case IrOpcode::kExternalConstant:
104 case IrOpcode::kRelocatableInt32Constant:
105 case IrOpcode::kRelocatableInt64Constant:
106 return true;
107 case IrOpcode::kNumberConstant: {
108 const double value = OpParameter<double>(node->op());
109 return bit_cast<int64_t>(value) == 0;
110 }
111 case IrOpcode::kHeapConstant: {
112 // TODO(bmeurer): We must not dereference handles concurrently. If we
113 // really have to this here, then we need to find a way to put this
114 // information on the HeapConstant node already.
115 #if 0
116 // Constants in young generation cannot be used as immediates in V8
117 // because the GC does not scan code objects when collecting the young
118 // generation.
119 Handle<HeapObject> value = HeapConstantOf(node->op());
120 return !Heap::InYoungGeneration(*value);
121 #else
122 return false;
123 #endif
124 }
125 default:
126 return false;
127 }
128 }
129
GenerateMemoryOperandInputs(Node * index,int scale,Node * base,int32_t displacement,DisplacementMode displacement_mode,InstructionOperand inputs[],size_t * input_count,RegisterMode register_mode=kRegister)130 AddressingMode GenerateMemoryOperandInputs(
131 Node* index, int scale, Node* base, int32_t displacement,
132 DisplacementMode displacement_mode, InstructionOperand inputs[],
133 size_t* input_count, RegisterMode register_mode = kRegister) {
134 AddressingMode mode = kMode_MRI;
135 if (displacement_mode == kNegativeDisplacement) {
136 displacement = base::bits::WraparoundNeg32(displacement);
137 }
138 if (base != nullptr) {
139 if (base->opcode() == IrOpcode::kInt32Constant) {
140 displacement = base::bits::WraparoundAdd32(
141 displacement, OpParameter<int32_t>(base->op()));
142 base = nullptr;
143 }
144 }
145 if (base != nullptr) {
146 inputs[(*input_count)++] = UseRegisterWithMode(base, register_mode);
147 if (index != nullptr) {
148 DCHECK(scale >= 0 && scale <= 3);
149 inputs[(*input_count)++] = UseRegisterWithMode(index, register_mode);
150 if (displacement != 0) {
151 inputs[(*input_count)++] = TempImmediate(displacement);
152 static const AddressingMode kMRnI_modes[] = {kMode_MR1I, kMode_MR2I,
153 kMode_MR4I, kMode_MR8I};
154 mode = kMRnI_modes[scale];
155 } else {
156 static const AddressingMode kMRn_modes[] = {kMode_MR1, kMode_MR2,
157 kMode_MR4, kMode_MR8};
158 mode = kMRn_modes[scale];
159 }
160 } else {
161 if (displacement == 0) {
162 mode = kMode_MR;
163 } else {
164 inputs[(*input_count)++] = TempImmediate(displacement);
165 mode = kMode_MRI;
166 }
167 }
168 } else {
169 DCHECK(scale >= 0 && scale <= 3);
170 if (index != nullptr) {
171 inputs[(*input_count)++] = UseRegisterWithMode(index, register_mode);
172 if (displacement != 0) {
173 inputs[(*input_count)++] = TempImmediate(displacement);
174 static const AddressingMode kMnI_modes[] = {kMode_MRI, kMode_M2I,
175 kMode_M4I, kMode_M8I};
176 mode = kMnI_modes[scale];
177 } else {
178 static const AddressingMode kMn_modes[] = {kMode_MR, kMode_M2,
179 kMode_M4, kMode_M8};
180 mode = kMn_modes[scale];
181 }
182 } else {
183 inputs[(*input_count)++] = TempImmediate(displacement);
184 return kMode_MI;
185 }
186 }
187 return mode;
188 }
189
GenerateMemoryOperandInputs(Node * index,int scale,Node * base,Node * displacement_node,DisplacementMode displacement_mode,InstructionOperand inputs[],size_t * input_count,RegisterMode register_mode=kRegister)190 AddressingMode GenerateMemoryOperandInputs(
191 Node* index, int scale, Node* base, Node* displacement_node,
192 DisplacementMode displacement_mode, InstructionOperand inputs[],
193 size_t* input_count, RegisterMode register_mode = kRegister) {
194 int32_t displacement = (displacement_node == nullptr)
195 ? 0
196 : OpParameter<int32_t>(displacement_node->op());
197 return GenerateMemoryOperandInputs(index, scale, base, displacement,
198 displacement_mode, inputs, input_count,
199 register_mode);
200 }
201
GetEffectiveAddressMemoryOperand(Node * node,InstructionOperand inputs[],size_t * input_count,RegisterMode register_mode=kRegister)202 AddressingMode GetEffectiveAddressMemoryOperand(
203 Node* node, InstructionOperand inputs[], size_t* input_count,
204 RegisterMode register_mode = kRegister) {
205 {
206 LoadMatcher<ExternalReferenceMatcher> m(node);
207 if (m.index().HasResolvedValue() && m.object().HasResolvedValue() &&
208 selector()->CanAddressRelativeToRootsRegister(
209 m.object().ResolvedValue())) {
210 ptrdiff_t const delta =
211 m.index().ResolvedValue() +
212 TurboAssemblerBase::RootRegisterOffsetForExternalReference(
213 selector()->isolate(), m.object().ResolvedValue());
214 if (is_int32(delta)) {
215 inputs[(*input_count)++] = TempImmediate(static_cast<int32_t>(delta));
216 return kMode_Root;
217 }
218 }
219 }
220
221 BaseWithIndexAndDisplacement32Matcher m(node, AddressOption::kAllowAll);
222 DCHECK(m.matches());
223 if ((m.displacement() == nullptr || CanBeImmediate(m.displacement()))) {
224 return GenerateMemoryOperandInputs(
225 m.index(), m.scale(), m.base(), m.displacement(),
226 m.displacement_mode(), inputs, input_count, register_mode);
227 } else {
228 inputs[(*input_count)++] =
229 UseRegisterWithMode(node->InputAt(0), register_mode);
230 inputs[(*input_count)++] =
231 UseRegisterWithMode(node->InputAt(1), register_mode);
232 return kMode_MR1;
233 }
234 }
235
GetEffectiveIndexOperand(Node * index,AddressingMode * mode)236 InstructionOperand GetEffectiveIndexOperand(Node* index,
237 AddressingMode* mode) {
238 if (CanBeImmediate(index)) {
239 *mode = kMode_MRI;
240 return UseImmediate(index);
241 } else {
242 *mode = kMode_MR1;
243 return UseUniqueRegister(index);
244 }
245 }
246
CanBeBetterLeftOperand(Node * node) const247 bool CanBeBetterLeftOperand(Node* node) const {
248 return !selector()->IsLive(node);
249 }
250 };
251
252 namespace {
253
GetLoadOpcode(LoadRepresentation load_rep)254 ArchOpcode GetLoadOpcode(LoadRepresentation load_rep) {
255 ArchOpcode opcode;
256 switch (load_rep.representation()) {
257 case MachineRepresentation::kFloat32:
258 opcode = kIA32Movss;
259 break;
260 case MachineRepresentation::kFloat64:
261 opcode = kIA32Movsd;
262 break;
263 case MachineRepresentation::kBit: // Fall through.
264 case MachineRepresentation::kWord8:
265 opcode = load_rep.IsSigned() ? kIA32Movsxbl : kIA32Movzxbl;
266 break;
267 case MachineRepresentation::kWord16:
268 opcode = load_rep.IsSigned() ? kIA32Movsxwl : kIA32Movzxwl;
269 break;
270 case MachineRepresentation::kTaggedSigned: // Fall through.
271 case MachineRepresentation::kTaggedPointer: // Fall through.
272 case MachineRepresentation::kTagged: // Fall through.
273 case MachineRepresentation::kWord32:
274 opcode = kIA32Movl;
275 break;
276 case MachineRepresentation::kSimd128:
277 opcode = kIA32Movdqu;
278 break;
279 case MachineRepresentation::kCompressedPointer: // Fall through.
280 case MachineRepresentation::kCompressed: // Fall through.
281 case MachineRepresentation::kSandboxedPointer: // Fall through.
282 case MachineRepresentation::kWord64: // Fall through.
283 case MachineRepresentation::kMapWord: // Fall through.
284 case MachineRepresentation::kNone:
285 UNREACHABLE();
286 }
287 return opcode;
288 }
289
VisitRO(InstructionSelector * selector,Node * node,ArchOpcode opcode)290 void VisitRO(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
291 IA32OperandGenerator g(selector);
292 Node* input = node->InputAt(0);
293 // We have to use a byte register as input to movsxb.
294 InstructionOperand input_op =
295 opcode == kIA32Movsxbl ? g.UseFixed(input, eax) : g.Use(input);
296 selector->Emit(opcode, g.DefineAsRegister(node), input_op);
297 }
298
VisitROWithTemp(InstructionSelector * selector,Node * node,ArchOpcode opcode)299 void VisitROWithTemp(InstructionSelector* selector, Node* node,
300 ArchOpcode opcode) {
301 IA32OperandGenerator g(selector);
302 InstructionOperand temps[] = {g.TempRegister()};
303 selector->Emit(opcode, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
304 arraysize(temps), temps);
305 }
306
VisitROWithTempSimd(InstructionSelector * selector,Node * node,ArchOpcode opcode)307 void VisitROWithTempSimd(InstructionSelector* selector, Node* node,
308 ArchOpcode opcode) {
309 IA32OperandGenerator g(selector);
310 InstructionOperand temps[] = {g.TempSimd128Register()};
311 selector->Emit(opcode, g.DefineAsRegister(node),
312 g.UseUniqueRegister(node->InputAt(0)), arraysize(temps),
313 temps);
314 }
315
VisitRR(InstructionSelector * selector,Node * node,InstructionCode opcode)316 void VisitRR(InstructionSelector* selector, Node* node,
317 InstructionCode opcode) {
318 IA32OperandGenerator g(selector);
319 selector->Emit(opcode, g.DefineAsRegister(node),
320 g.UseRegister(node->InputAt(0)));
321 }
322
VisitRROFloat(InstructionSelector * selector,Node * node,ArchOpcode opcode)323 void VisitRROFloat(InstructionSelector* selector, Node* node,
324 ArchOpcode opcode) {
325 IA32OperandGenerator g(selector);
326 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
327 InstructionOperand operand1 = g.Use(node->InputAt(1));
328 if (selector->IsSupported(AVX)) {
329 selector->Emit(opcode, g.DefineAsRegister(node), operand0, operand1);
330 } else {
331 selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1);
332 }
333 }
334
335 // For float unary operations. Also allocates a temporary general register for
336 // used in external operands. If a temp is not required, use VisitRRSimd (since
337 // float and SIMD registers are the same on IA32.
VisitFloatUnop(InstructionSelector * selector,Node * node,Node * input,ArchOpcode opcode)338 void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
339 ArchOpcode opcode) {
340 IA32OperandGenerator g(selector);
341 InstructionOperand temps[] = {g.TempRegister()};
342 // No need for unique because inputs are float but temp is general.
343 if (selector->IsSupported(AVX)) {
344 selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
345 arraysize(temps), temps);
346 } else {
347 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
348 arraysize(temps), temps);
349 }
350 }
351
VisitRRSimd(InstructionSelector * selector,Node * node,ArchOpcode avx_opcode,ArchOpcode sse_opcode)352 void VisitRRSimd(InstructionSelector* selector, Node* node,
353 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
354 IA32OperandGenerator g(selector);
355 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
356 if (selector->IsSupported(AVX)) {
357 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0);
358 } else {
359 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0);
360 }
361 }
362
VisitRRSimd(InstructionSelector * selector,Node * node,ArchOpcode opcode)363 void VisitRRSimd(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
364 VisitRRSimd(selector, node, opcode, opcode);
365 }
366
367 // TODO(v8:9198): Like VisitRROFloat, but for SIMD. SSE requires operand1 to be
368 // a register as we don't have memory alignment yet. For AVX, memory operands
369 // are fine, but can have performance issues if not aligned to 16/32 bytes
370 // (based on load size), see SDM Vol 1, chapter 14.9
VisitRROSimd(InstructionSelector * selector,Node * node,ArchOpcode avx_opcode,ArchOpcode sse_opcode)371 void VisitRROSimd(InstructionSelector* selector, Node* node,
372 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
373 IA32OperandGenerator g(selector);
374 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
375 if (selector->IsSupported(AVX)) {
376 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0,
377 g.UseRegister(node->InputAt(1)));
378 } else {
379 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0,
380 g.UseRegister(node->InputAt(1)));
381 }
382 }
VisitRRRSimd(InstructionSelector * selector,Node * node,ArchOpcode opcode)383 void VisitRRRSimd(InstructionSelector* selector, Node* node,
384 ArchOpcode opcode) {
385 IA32OperandGenerator g(selector);
386 InstructionOperand dst = selector->IsSupported(AVX)
387 ? g.DefineAsRegister(node)
388 : g.DefineSameAsFirst(node);
389 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
390 InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
391 selector->Emit(opcode, dst, operand0, operand1);
392 }
393
VisitRRISimd(InstructionSelector * selector,Node * node,ArchOpcode opcode)394 void VisitRRISimd(InstructionSelector* selector, Node* node,
395 ArchOpcode opcode) {
396 IA32OperandGenerator g(selector);
397 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
398 InstructionOperand operand1 =
399 g.UseImmediate(OpParameter<int32_t>(node->op()));
400 // 8x16 uses movsx_b on dest to extract a byte, which only works
401 // if dest is a byte register.
402 InstructionOperand dest = opcode == kIA32I8x16ExtractLaneS
403 ? g.DefineAsFixed(node, eax)
404 : g.DefineAsRegister(node);
405 selector->Emit(opcode, dest, operand0, operand1);
406 }
407
VisitRRISimd(InstructionSelector * selector,Node * node,ArchOpcode avx_opcode,ArchOpcode sse_opcode)408 void VisitRRISimd(InstructionSelector* selector, Node* node,
409 ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
410 IA32OperandGenerator g(selector);
411 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
412 InstructionOperand operand1 =
413 g.UseImmediate(OpParameter<int32_t>(node->op()));
414 if (selector->IsSupported(AVX)) {
415 selector->Emit(avx_opcode, g.DefineAsRegister(node), operand0, operand1);
416 } else {
417 selector->Emit(sse_opcode, g.DefineSameAsFirst(node), operand0, operand1);
418 }
419 }
420
VisitRROSimdShift(InstructionSelector * selector,Node * node,ArchOpcode opcode)421 void VisitRROSimdShift(InstructionSelector* selector, Node* node,
422 ArchOpcode opcode) {
423 IA32OperandGenerator g(selector);
424 if (g.CanBeImmediate(node->InputAt(1))) {
425 selector->Emit(opcode, g.DefineSameAsFirst(node),
426 g.UseRegister(node->InputAt(0)),
427 g.UseImmediate(node->InputAt(1)));
428 } else {
429 InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
430 InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
431 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
432 selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
433 arraysize(temps), temps);
434 }
435 }
436
VisitRRRR(InstructionSelector * selector,Node * node,InstructionCode opcode)437 void VisitRRRR(InstructionSelector* selector, Node* node,
438 InstructionCode opcode) {
439 IA32OperandGenerator g(selector);
440 selector->Emit(
441 opcode, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
442 g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
443 }
444
VisitI8x16Shift(InstructionSelector * selector,Node * node,ArchOpcode opcode)445 void VisitI8x16Shift(InstructionSelector* selector, Node* node,
446 ArchOpcode opcode) {
447 IA32OperandGenerator g(selector);
448 InstructionOperand output = CpuFeatures::IsSupported(AVX)
449 ? g.UseRegister(node)
450 : g.DefineSameAsFirst(node);
451
452 if (g.CanBeImmediate(node->InputAt(1))) {
453 if (opcode == kIA32I8x16ShrS) {
454 selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
455 g.UseImmediate(node->InputAt(1)));
456 } else {
457 InstructionOperand temps[] = {g.TempRegister()};
458 selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
459 g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
460 }
461 } else {
462 InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
463 InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
464 InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
465 selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
466 }
467 }
468 } // namespace
469
VisitStackSlot(Node * node)470 void InstructionSelector::VisitStackSlot(Node* node) {
471 StackSlotRepresentation rep = StackSlotRepresentationOf(node->op());
472 int slot = frame_->AllocateSpillSlot(rep.size(), rep.alignment());
473 OperandGenerator g(this);
474
475 Emit(kArchStackSlot, g.DefineAsRegister(node),
476 sequence()->AddImmediate(Constant(slot)), 0, nullptr);
477 }
478
VisitAbortCSADcheck(Node * node)479 void InstructionSelector::VisitAbortCSADcheck(Node* node) {
480 IA32OperandGenerator g(this);
481 Emit(kArchAbortCSADcheck, g.NoOutput(), g.UseFixed(node->InputAt(0), edx));
482 }
483
VisitLoadLane(Node * node)484 void InstructionSelector::VisitLoadLane(Node* node) {
485 LoadLaneParameters params = LoadLaneParametersOf(node->op());
486 InstructionCode opcode = kArchNop;
487 if (params.rep == MachineType::Int8()) {
488 opcode = kIA32Pinsrb;
489 } else if (params.rep == MachineType::Int16()) {
490 opcode = kIA32Pinsrw;
491 } else if (params.rep == MachineType::Int32()) {
492 opcode = kIA32Pinsrd;
493 } else if (params.rep == MachineType::Int64()) {
494 // pinsrq not available on IA32.
495 if (params.laneidx == 0) {
496 opcode = kIA32Movlps;
497 } else {
498 DCHECK_EQ(1, params.laneidx);
499 opcode = kIA32Movhps;
500 }
501 } else {
502 UNREACHABLE();
503 }
504
505 IA32OperandGenerator g(this);
506 InstructionOperand outputs[] = {IsSupported(AVX) ? g.DefineAsRegister(node)
507 : g.DefineSameAsFirst(node)};
508 // Input 0 is value node, 1 is lane idx, and GetEffectiveAddressMemoryOperand
509 // uses up to 3 inputs. This ordering is consistent with other operations that
510 // use the same opcode.
511 InstructionOperand inputs[5];
512 size_t input_count = 0;
513
514 inputs[input_count++] = g.UseRegister(node->InputAt(2));
515 inputs[input_count++] = g.UseImmediate(params.laneidx);
516
517 AddressingMode mode =
518 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
519 opcode |= AddressingModeField::encode(mode);
520
521 DCHECK_GE(5, input_count);
522
523 // IA32 supports unaligned loads.
524 DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
525 // Trap handler is not supported on IA32.
526 DCHECK_NE(params.kind, MemoryAccessKind::kProtected);
527
528 Emit(opcode, 1, outputs, input_count, inputs);
529 }
530
VisitLoadTransform(Node * node)531 void InstructionSelector::VisitLoadTransform(Node* node) {
532 LoadTransformParameters params = LoadTransformParametersOf(node->op());
533 InstructionCode opcode;
534 switch (params.transformation) {
535 case LoadTransformation::kS128Load8Splat:
536 opcode = kIA32S128Load8Splat;
537 break;
538 case LoadTransformation::kS128Load16Splat:
539 opcode = kIA32S128Load16Splat;
540 break;
541 case LoadTransformation::kS128Load32Splat:
542 opcode = kIA32S128Load32Splat;
543 break;
544 case LoadTransformation::kS128Load64Splat:
545 opcode = kIA32S128Load64Splat;
546 break;
547 case LoadTransformation::kS128Load8x8S:
548 opcode = kIA32S128Load8x8S;
549 break;
550 case LoadTransformation::kS128Load8x8U:
551 opcode = kIA32S128Load8x8U;
552 break;
553 case LoadTransformation::kS128Load16x4S:
554 opcode = kIA32S128Load16x4S;
555 break;
556 case LoadTransformation::kS128Load16x4U:
557 opcode = kIA32S128Load16x4U;
558 break;
559 case LoadTransformation::kS128Load32x2S:
560 opcode = kIA32S128Load32x2S;
561 break;
562 case LoadTransformation::kS128Load32x2U:
563 opcode = kIA32S128Load32x2U;
564 break;
565 case LoadTransformation::kS128Load32Zero:
566 opcode = kIA32Movss;
567 break;
568 case LoadTransformation::kS128Load64Zero:
569 opcode = kIA32Movsd;
570 break;
571 default:
572 UNREACHABLE();
573 }
574
575 // IA32 supports unaligned loads.
576 DCHECK_NE(params.kind, MemoryAccessKind::kUnaligned);
577 // Trap handler is not supported on IA32.
578 DCHECK_NE(params.kind, MemoryAccessKind::kProtected);
579
580 IA32OperandGenerator g(this);
581 InstructionOperand outputs[1];
582 outputs[0] = g.DefineAsRegister(node);
583 InstructionOperand inputs[3];
584 size_t input_count = 0;
585 AddressingMode mode =
586 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
587 InstructionCode code = opcode | AddressingModeField::encode(mode);
588 Emit(code, 1, outputs, input_count, inputs);
589 }
590
VisitLoad(Node * node,Node * value,InstructionCode opcode)591 void InstructionSelector::VisitLoad(Node* node, Node* value,
592 InstructionCode opcode) {
593 IA32OperandGenerator g(this);
594 InstructionOperand outputs[1];
595 outputs[0] = g.DefineAsRegister(node);
596 InstructionOperand inputs[3];
597 size_t input_count = 0;
598 AddressingMode mode =
599 g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
600 InstructionCode code = opcode | AddressingModeField::encode(mode);
601 Emit(code, 1, outputs, input_count, inputs);
602 }
603
VisitLoad(Node * node)604 void InstructionSelector::VisitLoad(Node* node) {
605 LoadRepresentation load_rep = LoadRepresentationOf(node->op());
606 DCHECK(!load_rep.IsMapWord());
607 VisitLoad(node, node, GetLoadOpcode(load_rep));
608 }
609
VisitProtectedLoad(Node * node)610 void InstructionSelector::VisitProtectedLoad(Node* node) {
611 // TODO(eholk)
612 UNIMPLEMENTED();
613 }
614
615 namespace {
616
GetStoreOpcode(MachineRepresentation rep)617 ArchOpcode GetStoreOpcode(MachineRepresentation rep) {
618 switch (rep) {
619 case MachineRepresentation::kFloat32:
620 return kIA32Movss;
621 case MachineRepresentation::kFloat64:
622 return kIA32Movsd;
623 case MachineRepresentation::kBit: // Fall through.
624 case MachineRepresentation::kWord8:
625 return kIA32Movb;
626 case MachineRepresentation::kWord16:
627 return kIA32Movw;
628 case MachineRepresentation::kTaggedSigned: // Fall through.
629 case MachineRepresentation::kTaggedPointer: // Fall through.
630 case MachineRepresentation::kTagged: // Fall through.
631 case MachineRepresentation::kWord32:
632 return kIA32Movl;
633 case MachineRepresentation::kSimd128:
634 return kIA32Movdqu;
635 case MachineRepresentation::kCompressedPointer: // Fall through.
636 case MachineRepresentation::kCompressed: // Fall through.
637 case MachineRepresentation::kSandboxedPointer: // Fall through.
638 case MachineRepresentation::kWord64: // Fall through.
639 case MachineRepresentation::kMapWord: // Fall through.
640 case MachineRepresentation::kNone:
641 UNREACHABLE();
642 }
643 }
644
GetSeqCstStoreOpcode(MachineRepresentation rep)645 ArchOpcode GetSeqCstStoreOpcode(MachineRepresentation rep) {
646 switch (rep) {
647 case MachineRepresentation::kWord8:
648 return kAtomicExchangeInt8;
649 case MachineRepresentation::kWord16:
650 return kAtomicExchangeInt16;
651 case MachineRepresentation::kTaggedSigned: // Fall through.
652 case MachineRepresentation::kTaggedPointer: // Fall through.
653 case MachineRepresentation::kTagged: // Fall through.
654 case MachineRepresentation::kWord32:
655 return kAtomicExchangeWord32;
656 default:
657 UNREACHABLE();
658 }
659 }
660
VisitAtomicExchange(InstructionSelector * selector,Node * node,ArchOpcode opcode,MachineRepresentation rep)661 void VisitAtomicExchange(InstructionSelector* selector, Node* node,
662 ArchOpcode opcode, MachineRepresentation rep) {
663 IA32OperandGenerator g(selector);
664 Node* base = node->InputAt(0);
665 Node* index = node->InputAt(1);
666 Node* value = node->InputAt(2);
667
668 AddressingMode addressing_mode;
669 InstructionOperand value_operand = (rep == MachineRepresentation::kWord8)
670 ? g.UseFixed(value, edx)
671 : g.UseUniqueRegister(value);
672 InstructionOperand inputs[] = {
673 value_operand, g.UseUniqueRegister(base),
674 g.GetEffectiveIndexOperand(index, &addressing_mode)};
675 InstructionOperand outputs[] = {
676 (rep == MachineRepresentation::kWord8)
677 // Using DefineSameAsFirst requires the register to be unallocated.
678 ? g.DefineAsFixed(node, edx)
679 : g.DefineSameAsFirst(node)};
680 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
681 selector->Emit(code, 1, outputs, arraysize(inputs), inputs);
682 }
683
VisitStoreCommon(InstructionSelector * selector,Node * node,StoreRepresentation store_rep,base::Optional<AtomicMemoryOrder> atomic_order)684 void VisitStoreCommon(InstructionSelector* selector, Node* node,
685 StoreRepresentation store_rep,
686 base::Optional<AtomicMemoryOrder> atomic_order) {
687 IA32OperandGenerator g(selector);
688 Node* base = node->InputAt(0);
689 Node* index = node->InputAt(1);
690 Node* value = node->InputAt(2);
691
692 WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
693 MachineRepresentation rep = store_rep.representation();
694 const bool is_seqcst =
695 atomic_order && *atomic_order == AtomicMemoryOrder::kSeqCst;
696
697 if (FLAG_enable_unconditional_write_barriers && CanBeTaggedPointer(rep)) {
698 write_barrier_kind = kFullWriteBarrier;
699 }
700
701 if (write_barrier_kind != kNoWriteBarrier && !FLAG_disable_write_barriers) {
702 DCHECK(CanBeTaggedPointer(rep));
703 AddressingMode addressing_mode;
704 InstructionOperand inputs[] = {
705 g.UseUniqueRegister(base),
706 g.GetEffectiveIndexOperand(index, &addressing_mode),
707 g.UseUniqueRegister(value)};
708 RecordWriteMode record_write_mode =
709 WriteBarrierKindToRecordWriteMode(write_barrier_kind);
710 InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
711 size_t const temp_count = arraysize(temps);
712 InstructionCode code = is_seqcst ? kArchAtomicStoreWithWriteBarrier
713 : kArchStoreWithWriteBarrier;
714 code |= AddressingModeField::encode(addressing_mode);
715 code |= MiscField::encode(static_cast<int>(record_write_mode));
716 selector->Emit(code, 0, nullptr, arraysize(inputs), inputs, temp_count,
717 temps);
718 } else if (is_seqcst) {
719 VisitAtomicExchange(selector, node, GetSeqCstStoreOpcode(rep), rep);
720 } else {
721 // Release and non-atomic stores emit MOV.
722 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
723
724 InstructionOperand val;
725 if (g.CanBeImmediate(value)) {
726 val = g.UseImmediate(value);
727 } else if (!atomic_order && (rep == MachineRepresentation::kWord8 ||
728 rep == MachineRepresentation::kBit)) {
729 val = g.UseByteRegister(value);
730 } else {
731 val = g.UseRegister(value);
732 }
733
734 InstructionOperand inputs[4];
735 size_t input_count = 0;
736 AddressingMode addressing_mode =
737 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
738 InstructionCode code =
739 GetStoreOpcode(rep) | AddressingModeField::encode(addressing_mode);
740 inputs[input_count++] = val;
741 selector->Emit(code, 0, static_cast<InstructionOperand*>(nullptr),
742 input_count, inputs);
743 }
744 }
745
746 } // namespace
747
VisitStore(Node * node)748 void InstructionSelector::VisitStore(Node* node) {
749 VisitStoreCommon(this, node, StoreRepresentationOf(node->op()),
750 base::nullopt);
751 }
752
VisitProtectedStore(Node * node)753 void InstructionSelector::VisitProtectedStore(Node* node) {
754 // TODO(eholk)
755 UNIMPLEMENTED();
756 }
757
VisitStoreLane(Node * node)758 void InstructionSelector::VisitStoreLane(Node* node) {
759 IA32OperandGenerator g(this);
760
761 StoreLaneParameters params = StoreLaneParametersOf(node->op());
762 InstructionCode opcode = kArchNop;
763 if (params.rep == MachineRepresentation::kWord8) {
764 opcode = kIA32Pextrb;
765 } else if (params.rep == MachineRepresentation::kWord16) {
766 opcode = kIA32Pextrw;
767 } else if (params.rep == MachineRepresentation::kWord32) {
768 opcode = kIA32S128Store32Lane;
769 } else if (params.rep == MachineRepresentation::kWord64) {
770 if (params.laneidx == 0) {
771 opcode = kIA32Movlps;
772 } else {
773 DCHECK_EQ(1, params.laneidx);
774 opcode = kIA32Movhps;
775 }
776 } else {
777 UNREACHABLE();
778 }
779
780 InstructionOperand inputs[4];
781 size_t input_count = 0;
782 AddressingMode addressing_mode =
783 g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count);
784 opcode |= AddressingModeField::encode(addressing_mode);
785
786 InstructionOperand value_operand = g.UseRegister(node->InputAt(2));
787 inputs[input_count++] = value_operand;
788 inputs[input_count++] = g.UseImmediate(params.laneidx);
789 DCHECK_GE(4, input_count);
790 Emit(opcode, 0, nullptr, input_count, inputs);
791 }
792
793 // Architecture supports unaligned access, therefore VisitLoad is used instead
VisitUnalignedLoad(Node * node)794 void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
795
796 // Architecture supports unaligned access, therefore VisitStore is used instead
VisitUnalignedStore(Node * node)797 void InstructionSelector::VisitUnalignedStore(Node* node) { UNREACHABLE(); }
798
799 namespace {
800
801 // Shared routine for multiple binary operations.
VisitBinop(InstructionSelector * selector,Node * node,InstructionCode opcode,FlagsContinuation * cont)802 void VisitBinop(InstructionSelector* selector, Node* node,
803 InstructionCode opcode, FlagsContinuation* cont) {
804 IA32OperandGenerator g(selector);
805 Int32BinopMatcher m(node);
806 Node* left = m.left().node();
807 Node* right = m.right().node();
808 InstructionOperand inputs[6];
809 size_t input_count = 0;
810 InstructionOperand outputs[1];
811 size_t output_count = 0;
812
813 // TODO(turbofan): match complex addressing modes.
814 if (left == right) {
815 // If both inputs refer to the same operand, enforce allocating a register
816 // for both of them to ensure that we don't end up generating code like
817 // this:
818 //
819 // mov eax, [ebp-0x10]
820 // add eax, [ebp-0x10]
821 // jo label
822 InstructionOperand const input = g.UseRegister(left);
823 inputs[input_count++] = input;
824 inputs[input_count++] = input;
825 } else if (g.CanBeImmediate(right)) {
826 inputs[input_count++] = g.UseRegister(left);
827 inputs[input_count++] = g.UseImmediate(right);
828 } else {
829 int effect_level = selector->GetEffectLevel(node, cont);
830 if (node->op()->HasProperty(Operator::kCommutative) &&
831 g.CanBeBetterLeftOperand(right) &&
832 (!g.CanBeBetterLeftOperand(left) ||
833 !g.CanBeMemoryOperand(opcode, node, right, effect_level))) {
834 std::swap(left, right);
835 }
836 if (g.CanBeMemoryOperand(opcode, node, right, effect_level)) {
837 inputs[input_count++] = g.UseRegister(left);
838 AddressingMode addressing_mode =
839 g.GetEffectiveAddressMemoryOperand(right, inputs, &input_count);
840 opcode |= AddressingModeField::encode(addressing_mode);
841 } else {
842 inputs[input_count++] = g.UseRegister(left);
843 inputs[input_count++] = g.Use(right);
844 }
845 }
846
847 outputs[output_count++] = g.DefineSameAsFirst(node);
848
849 DCHECK_NE(0u, input_count);
850 DCHECK_EQ(1u, output_count);
851 DCHECK_GE(arraysize(inputs), input_count);
852 DCHECK_GE(arraysize(outputs), output_count);
853
854 selector->EmitWithContinuation(opcode, output_count, outputs, input_count,
855 inputs, cont);
856 }
857
858 // Shared routine for multiple binary operations.
VisitBinop(InstructionSelector * selector,Node * node,InstructionCode opcode)859 void VisitBinop(InstructionSelector* selector, Node* node,
860 InstructionCode opcode) {
861 FlagsContinuation cont;
862 VisitBinop(selector, node, opcode, &cont);
863 }
864
865 } // namespace
866
VisitWord32And(Node * node)867 void InstructionSelector::VisitWord32And(Node* node) {
868 VisitBinop(this, node, kIA32And);
869 }
870
VisitWord32Or(Node * node)871 void InstructionSelector::VisitWord32Or(Node* node) {
872 VisitBinop(this, node, kIA32Or);
873 }
874
VisitWord32Xor(Node * node)875 void InstructionSelector::VisitWord32Xor(Node* node) {
876 IA32OperandGenerator g(this);
877 Int32BinopMatcher m(node);
878 if (m.right().Is(-1)) {
879 Emit(kIA32Not, g.DefineSameAsFirst(node), g.UseRegister(m.left().node()));
880 } else {
881 VisitBinop(this, node, kIA32Xor);
882 }
883 }
884
VisitStackPointerGreaterThan(Node * node,FlagsContinuation * cont)885 void InstructionSelector::VisitStackPointerGreaterThan(
886 Node* node, FlagsContinuation* cont) {
887 StackCheckKind kind = StackCheckKindOf(node->op());
888 InstructionCode opcode =
889 kArchStackPointerGreaterThan | MiscField::encode(static_cast<int>(kind));
890
891 int effect_level = GetEffectLevel(node, cont);
892
893 IA32OperandGenerator g(this);
894
895 // No outputs.
896 InstructionOperand* const outputs = nullptr;
897 const int output_count = 0;
898
899 // Applying an offset to this stack check requires a temp register. Offsets
900 // are only applied to the first stack check. If applying an offset, we must
901 // ensure the input and temp registers do not alias, thus kUniqueRegister.
902 InstructionOperand temps[] = {g.TempRegister()};
903 const int temp_count = (kind == StackCheckKind::kJSFunctionEntry) ? 1 : 0;
904 const auto register_mode = (kind == StackCheckKind::kJSFunctionEntry)
905 ? OperandGenerator::kUniqueRegister
906 : OperandGenerator::kRegister;
907
908 Node* const value = node->InputAt(0);
909 if (g.CanBeMemoryOperand(kIA32Cmp, node, value, effect_level)) {
910 DCHECK(value->opcode() == IrOpcode::kLoad ||
911 value->opcode() == IrOpcode::kLoadImmutable);
912
913 // GetEffectiveAddressMemoryOperand can create at most 3 inputs.
914 static constexpr int kMaxInputCount = 3;
915
916 size_t input_count = 0;
917 InstructionOperand inputs[kMaxInputCount];
918 AddressingMode addressing_mode = g.GetEffectiveAddressMemoryOperand(
919 value, inputs, &input_count, register_mode);
920 opcode |= AddressingModeField::encode(addressing_mode);
921 DCHECK_LE(input_count, kMaxInputCount);
922
923 EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
924 temp_count, temps, cont);
925 } else {
926 InstructionOperand inputs[] = {g.UseRegisterWithMode(value, register_mode)};
927 static constexpr int input_count = arraysize(inputs);
928 EmitWithContinuation(opcode, output_count, outputs, input_count, inputs,
929 temp_count, temps, cont);
930 }
931 }
932
933 // Shared routine for multiple shift operations.
VisitShift(InstructionSelector * selector,Node * node,ArchOpcode opcode)934 static inline void VisitShift(InstructionSelector* selector, Node* node,
935 ArchOpcode opcode) {
936 IA32OperandGenerator g(selector);
937 Node* left = node->InputAt(0);
938 Node* right = node->InputAt(1);
939
940 if (g.CanBeImmediate(right)) {
941 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
942 g.UseImmediate(right));
943 } else {
944 selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
945 g.UseFixed(right, ecx));
946 }
947 }
948
949 namespace {
950
VisitMulHigh(InstructionSelector * selector,Node * node,ArchOpcode opcode)951 void VisitMulHigh(InstructionSelector* selector, Node* node,
952 ArchOpcode opcode) {
953 IA32OperandGenerator g(selector);
954 InstructionOperand temps[] = {g.TempRegister(eax)};
955 selector->Emit(
956 opcode, g.DefineAsFixed(node, edx), g.UseFixed(node->InputAt(0), eax),
957 g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
958 }
959
VisitDiv(InstructionSelector * selector,Node * node,ArchOpcode opcode)960 void VisitDiv(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
961 IA32OperandGenerator g(selector);
962 InstructionOperand temps[] = {g.TempRegister(edx)};
963 selector->Emit(opcode, g.DefineAsFixed(node, eax),
964 g.UseFixed(node->InputAt(0), eax),
965 g.UseUnique(node->InputAt(1)), arraysize(temps), temps);
966 }
967
VisitMod(InstructionSelector * selector,Node * node,ArchOpcode opcode)968 void VisitMod(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
969 IA32OperandGenerator g(selector);
970 InstructionOperand temps[] = {g.TempRegister(eax)};
971 selector->Emit(opcode, g.DefineAsFixed(node, edx),
972 g.UseFixed(node->InputAt(0), eax),
973 g.UseUnique(node->InputAt(1)), arraysize(temps), temps);
974 }
975
EmitLea(InstructionSelector * selector,Node * result,Node * index,int scale,Node * base,Node * displacement,DisplacementMode displacement_mode)976 void EmitLea(InstructionSelector* selector, Node* result, Node* index,
977 int scale, Node* base, Node* displacement,
978 DisplacementMode displacement_mode) {
979 IA32OperandGenerator g(selector);
980 InstructionOperand inputs[4];
981 size_t input_count = 0;
982 AddressingMode mode =
983 g.GenerateMemoryOperandInputs(index, scale, base, displacement,
984 displacement_mode, inputs, &input_count);
985
986 DCHECK_NE(0u, input_count);
987 DCHECK_GE(arraysize(inputs), input_count);
988
989 InstructionOperand outputs[1];
990 outputs[0] = g.DefineAsRegister(result);
991
992 InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
993
994 selector->Emit(opcode, 1, outputs, input_count, inputs);
995 }
996
997 } // namespace
998
VisitWord32Shl(Node * node)999 void InstructionSelector::VisitWord32Shl(Node* node) {
1000 Int32ScaleMatcher m(node, true);
1001 if (m.matches()) {
1002 Node* index = node->InputAt(0);
1003 Node* base = m.power_of_two_plus_one() ? index : nullptr;
1004 EmitLea(this, node, index, m.scale(), base, nullptr, kPositiveDisplacement);
1005 return;
1006 }
1007 VisitShift(this, node, kIA32Shl);
1008 }
1009
VisitWord32Shr(Node * node)1010 void InstructionSelector::VisitWord32Shr(Node* node) {
1011 VisitShift(this, node, kIA32Shr);
1012 }
1013
VisitWord32Sar(Node * node)1014 void InstructionSelector::VisitWord32Sar(Node* node) {
1015 VisitShift(this, node, kIA32Sar);
1016 }
1017
VisitInt32PairAdd(Node * node)1018 void InstructionSelector::VisitInt32PairAdd(Node* node) {
1019 IA32OperandGenerator g(this);
1020
1021 Node* projection1 = NodeProperties::FindProjection(node, 1);
1022 if (projection1) {
1023 // We use UseUniqueRegister here to avoid register sharing with the temp
1024 // register.
1025 InstructionOperand inputs[] = {
1026 g.UseRegister(node->InputAt(0)),
1027 g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
1028 g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
1029
1030 InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
1031 g.DefineAsRegister(projection1)};
1032
1033 InstructionOperand temps[] = {g.TempRegister()};
1034
1035 Emit(kIA32AddPair, 2, outputs, 4, inputs, 1, temps);
1036 } else {
1037 // The high word of the result is not used, so we emit the standard 32 bit
1038 // instruction.
1039 Emit(kIA32Add, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
1040 g.Use(node->InputAt(2)));
1041 }
1042 }
1043
VisitInt32PairSub(Node * node)1044 void InstructionSelector::VisitInt32PairSub(Node* node) {
1045 IA32OperandGenerator g(this);
1046
1047 Node* projection1 = NodeProperties::FindProjection(node, 1);
1048 if (projection1) {
1049 // We use UseUniqueRegister here to avoid register sharing with the temp
1050 // register.
1051 InstructionOperand inputs[] = {
1052 g.UseRegister(node->InputAt(0)),
1053 g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
1054 g.UseRegister(node->InputAt(2)), g.UseUniqueRegister(node->InputAt(3))};
1055
1056 InstructionOperand outputs[] = {g.DefineSameAsFirst(node),
1057 g.DefineAsRegister(projection1)};
1058
1059 InstructionOperand temps[] = {g.TempRegister()};
1060
1061 Emit(kIA32SubPair, 2, outputs, 4, inputs, 1, temps);
1062 } else {
1063 // The high word of the result is not used, so we emit the standard 32 bit
1064 // instruction.
1065 Emit(kIA32Sub, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
1066 g.Use(node->InputAt(2)));
1067 }
1068 }
1069
VisitInt32PairMul(Node * node)1070 void InstructionSelector::VisitInt32PairMul(Node* node) {
1071 IA32OperandGenerator g(this);
1072
1073 Node* projection1 = NodeProperties::FindProjection(node, 1);
1074 if (projection1) {
1075 // InputAt(3) explicitly shares ecx with OutputRegister(1) to save one
1076 // register and one mov instruction.
1077 InstructionOperand inputs[] = {
1078 g.UseUnique(node->InputAt(0)),
1079 g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(1)),
1080 g.UseUniqueRegister(node->InputAt(2)),
1081 g.UseFixed(node->InputAt(3), ecx)};
1082
1083 InstructionOperand outputs[] = {
1084 g.DefineAsFixed(node, eax),
1085 g.DefineAsFixed(NodeProperties::FindProjection(node, 1), ecx)};
1086
1087 InstructionOperand temps[] = {g.TempRegister(edx)};
1088
1089 Emit(kIA32MulPair, 2, outputs, 4, inputs, 1, temps);
1090 } else {
1091 // The high word of the result is not used, so we emit the standard 32 bit
1092 // instruction.
1093 Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
1094 g.Use(node->InputAt(2)));
1095 }
1096 }
1097
VisitWord32PairShift(InstructionSelector * selector,InstructionCode opcode,Node * node)1098 void VisitWord32PairShift(InstructionSelector* selector, InstructionCode opcode,
1099 Node* node) {
1100 IA32OperandGenerator g(selector);
1101
1102 Node* shift = node->InputAt(2);
1103 InstructionOperand shift_operand;
1104 if (g.CanBeImmediate(shift)) {
1105 shift_operand = g.UseImmediate(shift);
1106 } else {
1107 shift_operand = g.UseFixed(shift, ecx);
1108 }
1109 InstructionOperand inputs[] = {g.UseFixed(node->InputAt(0), eax),
1110 g.UseFixed(node->InputAt(1), edx),
1111 shift_operand};
1112
1113 InstructionOperand outputs[2];
1114 InstructionOperand temps[1];
1115 int32_t output_count = 0;
1116 int32_t temp_count = 0;
1117 outputs[output_count++] = g.DefineAsFixed(node, eax);
1118 Node* projection1 = NodeProperties::FindProjection(node, 1);
1119 if (projection1) {
1120 outputs[output_count++] = g.DefineAsFixed(projection1, edx);
1121 } else {
1122 temps[temp_count++] = g.TempRegister(edx);
1123 }
1124
1125 selector->Emit(opcode, output_count, outputs, 3, inputs, temp_count, temps);
1126 }
1127
VisitWord32PairShl(Node * node)1128 void InstructionSelector::VisitWord32PairShl(Node* node) {
1129 VisitWord32PairShift(this, kIA32ShlPair, node);
1130 }
1131
VisitWord32PairShr(Node * node)1132 void InstructionSelector::VisitWord32PairShr(Node* node) {
1133 VisitWord32PairShift(this, kIA32ShrPair, node);
1134 }
1135
VisitWord32PairSar(Node * node)1136 void InstructionSelector::VisitWord32PairSar(Node* node) {
1137 VisitWord32PairShift(this, kIA32SarPair, node);
1138 }
1139
VisitWord32Rol(Node * node)1140 void InstructionSelector::VisitWord32Rol(Node* node) {
1141 VisitShift(this, node, kIA32Rol);
1142 }
1143
VisitWord32Ror(Node * node)1144 void InstructionSelector::VisitWord32Ror(Node* node) {
1145 VisitShift(this, node, kIA32Ror);
1146 }
1147
1148 #define RO_OP_LIST(V) \
1149 V(Word32Clz, kIA32Lzcnt) \
1150 V(Word32Ctz, kIA32Tzcnt) \
1151 V(Word32Popcnt, kIA32Popcnt) \
1152 V(ChangeFloat32ToFloat64, kIA32Float32ToFloat64) \
1153 V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
1154 V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
1155 V(TruncateFloat32ToInt32, kIA32Float32ToInt32) \
1156 V(ChangeFloat64ToInt32, kIA32Float64ToInt32) \
1157 V(TruncateFloat64ToFloat32, kIA32Float64ToFloat32) \
1158 V(RoundFloat64ToInt32, kIA32Float64ToInt32) \
1159 V(BitcastFloat32ToInt32, kIA32BitcastFI) \
1160 V(BitcastInt32ToFloat32, kIA32BitcastIF) \
1161 V(Float32Sqrt, kIA32Float32Sqrt) \
1162 V(Float64Sqrt, kIA32Float64Sqrt) \
1163 V(Float64ExtractLowWord32, kIA32Float64ExtractLowWord32) \
1164 V(Float64ExtractHighWord32, kIA32Float64ExtractHighWord32) \
1165 V(SignExtendWord8ToInt32, kIA32Movsxbl) \
1166 V(SignExtendWord16ToInt32, kIA32Movsxwl) \
1167 V(F64x2Sqrt, kIA32F64x2Sqrt)
1168
1169 #define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kIA32Uint32ToFloat64)
1170
1171 #define RO_WITH_TEMP_SIMD_OP_LIST(V) \
1172 V(TruncateFloat32ToUint32, kIA32Float32ToUint32) \
1173 V(ChangeFloat64ToUint32, kIA32Float64ToUint32) \
1174 V(TruncateFloat64ToUint32, kIA32Float64ToUint32)
1175
1176 #define RR_OP_LIST(V) \
1177 V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
1178 V(Float32RoundDown, kIA32Float32Round | MiscField::encode(kRoundDown)) \
1179 V(Float64RoundDown, kIA32Float64Round | MiscField::encode(kRoundDown)) \
1180 V(Float32RoundUp, kIA32Float32Round | MiscField::encode(kRoundUp)) \
1181 V(Float64RoundUp, kIA32Float64Round | MiscField::encode(kRoundUp)) \
1182 V(Float32RoundTruncate, kIA32Float32Round | MiscField::encode(kRoundToZero)) \
1183 V(Float64RoundTruncate, kIA32Float64Round | MiscField::encode(kRoundToZero)) \
1184 V(Float32RoundTiesEven, \
1185 kIA32Float32Round | MiscField::encode(kRoundToNearest)) \
1186 V(Float64RoundTiesEven, \
1187 kIA32Float64Round | MiscField::encode(kRoundToNearest)) \
1188 V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
1189 V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
1190 V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
1191 V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
1192 V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
1193 V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
1194 V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
1195 V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
1196
1197 #define RRO_FLOAT_OP_LIST(V) \
1198 V(Float32Add, kFloat32Add) \
1199 V(Float64Add, kFloat64Add) \
1200 V(Float32Sub, kFloat32Sub) \
1201 V(Float64Sub, kFloat64Sub) \
1202 V(Float32Mul, kFloat32Mul) \
1203 V(Float64Mul, kFloat64Mul) \
1204 V(Float32Div, kFloat32Div) \
1205 V(Float64Div, kFloat64Div) \
1206 V(F64x2Add, kIA32F64x2Add) \
1207 V(F64x2Sub, kIA32F64x2Sub) \
1208 V(F64x2Mul, kIA32F64x2Mul) \
1209 V(F64x2Div, kIA32F64x2Div) \
1210 V(F64x2Eq, kIA32F64x2Eq) \
1211 V(F64x2Ne, kIA32F64x2Ne) \
1212 V(F64x2Lt, kIA32F64x2Lt) \
1213 V(F64x2Le, kIA32F64x2Le)
1214
1215 #define FLOAT_UNOP_LIST(V) \
1216 V(Float32Abs, kFloat32Abs) \
1217 V(Float64Abs, kFloat64Abs) \
1218 V(Float32Neg, kFloat32Neg) \
1219 V(Float64Neg, kFloat64Neg) \
1220 V(F32x4Abs, kFloat32Abs) \
1221 V(F32x4Neg, kFloat32Neg) \
1222 V(F64x2Abs, kFloat64Abs) \
1223 V(F64x2Neg, kFloat64Neg)
1224
1225 #define RO_VISITOR(Name, opcode) \
1226 void InstructionSelector::Visit##Name(Node* node) { \
1227 VisitRO(this, node, opcode); \
1228 }
1229 RO_OP_LIST(RO_VISITOR)
1230 #undef RO_VISITOR
1231 #undef RO_OP_LIST
1232
1233 #define RO_WITH_TEMP_VISITOR(Name, opcode) \
1234 void InstructionSelector::Visit##Name(Node* node) { \
1235 VisitROWithTemp(this, node, opcode); \
1236 }
RO_WITH_TEMP_OP_LIST(RO_WITH_TEMP_VISITOR)1237 RO_WITH_TEMP_OP_LIST(RO_WITH_TEMP_VISITOR)
1238 #undef RO_WITH_TEMP_VISITOR
1239 #undef RO_WITH_TEMP_OP_LIST
1240
1241 #define RO_WITH_TEMP_SIMD_VISITOR(Name, opcode) \
1242 void InstructionSelector::Visit##Name(Node* node) { \
1243 VisitROWithTempSimd(this, node, opcode); \
1244 }
1245 RO_WITH_TEMP_SIMD_OP_LIST(RO_WITH_TEMP_SIMD_VISITOR)
1246 #undef RO_WITH_TEMP_SIMD_VISITOR
1247 #undef RO_WITH_TEMP_SIMD_OP_LIST
1248
1249 #define RR_VISITOR(Name, opcode) \
1250 void InstructionSelector::Visit##Name(Node* node) { \
1251 VisitRR(this, node, opcode); \
1252 }
1253 RR_OP_LIST(RR_VISITOR)
1254 #undef RR_VISITOR
1255 #undef RR_OP_LIST
1256
1257 #define RRO_FLOAT_VISITOR(Name, opcode) \
1258 void InstructionSelector::Visit##Name(Node* node) { \
1259 VisitRROFloat(this, node, opcode); \
1260 }
1261 RRO_FLOAT_OP_LIST(RRO_FLOAT_VISITOR)
1262 #undef RRO_FLOAT_VISITOR
1263 #undef RRO_FLOAT_OP_LIST
1264
1265 #define FLOAT_UNOP_VISITOR(Name, opcode) \
1266 void InstructionSelector::Visit##Name(Node* node) { \
1267 VisitFloatUnop(this, node, node->InputAt(0), opcode); \
1268 }
1269 FLOAT_UNOP_LIST(FLOAT_UNOP_VISITOR)
1270 #undef FLOAT_UNOP_VISITOR
1271 #undef FLOAT_UNOP_LIST
1272
1273 void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
1274
VisitWord64ReverseBytes(Node * node)1275 void InstructionSelector::VisitWord64ReverseBytes(Node* node) { UNREACHABLE(); }
1276
VisitWord32ReverseBytes(Node * node)1277 void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
1278 IA32OperandGenerator g(this);
1279 Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
1280 }
1281
VisitSimd128ReverseBytes(Node * node)1282 void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
1283 UNREACHABLE();
1284 }
1285
VisitInt32Add(Node * node)1286 void InstructionSelector::VisitInt32Add(Node* node) {
1287 IA32OperandGenerator g(this);
1288
1289 // Try to match the Add to a lea pattern
1290 BaseWithIndexAndDisplacement32Matcher m(node);
1291 if (m.matches() &&
1292 (m.displacement() == nullptr || g.CanBeImmediate(m.displacement()))) {
1293 InstructionOperand inputs[4];
1294 size_t input_count = 0;
1295 AddressingMode mode = g.GenerateMemoryOperandInputs(
1296 m.index(), m.scale(), m.base(), m.displacement(), m.displacement_mode(),
1297 inputs, &input_count);
1298
1299 DCHECK_NE(0u, input_count);
1300 DCHECK_GE(arraysize(inputs), input_count);
1301
1302 InstructionOperand outputs[1];
1303 outputs[0] = g.DefineAsRegister(node);
1304
1305 InstructionCode opcode = AddressingModeField::encode(mode) | kIA32Lea;
1306 Emit(opcode, 1, outputs, input_count, inputs);
1307 return;
1308 }
1309
1310 // No lea pattern match, use add
1311 VisitBinop(this, node, kIA32Add);
1312 }
1313
VisitInt32Sub(Node * node)1314 void InstructionSelector::VisitInt32Sub(Node* node) {
1315 IA32OperandGenerator g(this);
1316 Int32BinopMatcher m(node);
1317 if (m.left().Is(0)) {
1318 Emit(kIA32Neg, g.DefineSameAsFirst(node), g.Use(m.right().node()));
1319 } else {
1320 VisitBinop(this, node, kIA32Sub);
1321 }
1322 }
1323
VisitInt32Mul(Node * node)1324 void InstructionSelector::VisitInt32Mul(Node* node) {
1325 Int32ScaleMatcher m(node, true);
1326 if (m.matches()) {
1327 Node* index = node->InputAt(0);
1328 Node* base = m.power_of_two_plus_one() ? index : nullptr;
1329 EmitLea(this, node, index, m.scale(), base, nullptr, kPositiveDisplacement);
1330 return;
1331 }
1332 IA32OperandGenerator g(this);
1333 Node* left = node->InputAt(0);
1334 Node* right = node->InputAt(1);
1335 if (g.CanBeImmediate(right)) {
1336 Emit(kIA32Imul, g.DefineAsRegister(node), g.Use(left),
1337 g.UseImmediate(right));
1338 } else {
1339 if (g.CanBeBetterLeftOperand(right)) {
1340 std::swap(left, right);
1341 }
1342 Emit(kIA32Imul, g.DefineSameAsFirst(node), g.UseRegister(left),
1343 g.Use(right));
1344 }
1345 }
1346
VisitInt32MulHigh(Node * node)1347 void InstructionSelector::VisitInt32MulHigh(Node* node) {
1348 VisitMulHigh(this, node, kIA32ImulHigh);
1349 }
1350
VisitUint32MulHigh(Node * node)1351 void InstructionSelector::VisitUint32MulHigh(Node* node) {
1352 VisitMulHigh(this, node, kIA32UmulHigh);
1353 }
1354
VisitInt32Div(Node * node)1355 void InstructionSelector::VisitInt32Div(Node* node) {
1356 VisitDiv(this, node, kIA32Idiv);
1357 }
1358
VisitUint32Div(Node * node)1359 void InstructionSelector::VisitUint32Div(Node* node) {
1360 VisitDiv(this, node, kIA32Udiv);
1361 }
1362
VisitInt32Mod(Node * node)1363 void InstructionSelector::VisitInt32Mod(Node* node) {
1364 VisitMod(this, node, kIA32Idiv);
1365 }
1366
VisitUint32Mod(Node * node)1367 void InstructionSelector::VisitUint32Mod(Node* node) {
1368 VisitMod(this, node, kIA32Udiv);
1369 }
1370
VisitRoundUint32ToFloat32(Node * node)1371 void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
1372 IA32OperandGenerator g(this);
1373 InstructionOperand temps[] = {g.TempRegister()};
1374 Emit(kIA32Uint32ToFloat32, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
1375 arraysize(temps), temps);
1376 }
1377
VisitFloat64Mod(Node * node)1378 void InstructionSelector::VisitFloat64Mod(Node* node) {
1379 IA32OperandGenerator g(this);
1380 InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister()};
1381 Emit(kIA32Float64Mod, g.DefineSameAsFirst(node),
1382 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
1383 arraysize(temps), temps);
1384 }
1385
VisitFloat32Max(Node * node)1386 void InstructionSelector::VisitFloat32Max(Node* node) {
1387 IA32OperandGenerator g(this);
1388 InstructionOperand temps[] = {g.TempRegister()};
1389 Emit(kIA32Float32Max, g.DefineSameAsFirst(node),
1390 g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
1391 arraysize(temps), temps);
1392 }
1393
VisitFloat64Max(Node * node)1394 void InstructionSelector::VisitFloat64Max(Node* node) {
1395 IA32OperandGenerator g(this);
1396 InstructionOperand temps[] = {g.TempRegister()};
1397 Emit(kIA32Float64Max, g.DefineSameAsFirst(node),
1398 g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
1399 arraysize(temps), temps);
1400 }
1401
VisitFloat32Min(Node * node)1402 void InstructionSelector::VisitFloat32Min(Node* node) {
1403 IA32OperandGenerator g(this);
1404 InstructionOperand temps[] = {g.TempRegister()};
1405 Emit(kIA32Float32Min, g.DefineSameAsFirst(node),
1406 g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
1407 arraysize(temps), temps);
1408 }
1409
VisitFloat64Min(Node * node)1410 void InstructionSelector::VisitFloat64Min(Node* node) {
1411 IA32OperandGenerator g(this);
1412 InstructionOperand temps[] = {g.TempRegister()};
1413 Emit(kIA32Float64Min, g.DefineSameAsFirst(node),
1414 g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1)),
1415 arraysize(temps), temps);
1416 }
1417
VisitFloat64RoundTiesAway(Node * node)1418 void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
1419 UNREACHABLE();
1420 }
1421
VisitFloat64Ieee754Binop(Node * node,InstructionCode opcode)1422 void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
1423 InstructionCode opcode) {
1424 IA32OperandGenerator g(this);
1425 Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
1426 g.UseRegister(node->InputAt(1)))
1427 ->MarkAsCall();
1428 }
1429
VisitFloat64Ieee754Unop(Node * node,InstructionCode opcode)1430 void InstructionSelector::VisitFloat64Ieee754Unop(Node* node,
1431 InstructionCode opcode) {
1432 IA32OperandGenerator g(this);
1433 Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)))
1434 ->MarkAsCall();
1435 }
1436
EmitPrepareArguments(ZoneVector<PushParameter> * arguments,const CallDescriptor * call_descriptor,Node * node)1437 void InstructionSelector::EmitPrepareArguments(
1438 ZoneVector<PushParameter>* arguments, const CallDescriptor* call_descriptor,
1439 Node* node) {
1440 IA32OperandGenerator g(this);
1441
1442 // Prepare for C function call.
1443 if (call_descriptor->IsCFunctionCall()) {
1444 InstructionOperand temps[] = {g.TempRegister()};
1445 size_t const temp_count = arraysize(temps);
1446 Emit(kArchPrepareCallCFunction | MiscField::encode(static_cast<int>(
1447 call_descriptor->ParameterCount())),
1448 0, nullptr, 0, nullptr, temp_count, temps);
1449
1450 // Poke any stack arguments.
1451 for (size_t n = 0; n < arguments->size(); ++n) {
1452 PushParameter input = (*arguments)[n];
1453 if (input.node) {
1454 int const slot = static_cast<int>(n);
1455 InstructionOperand value = g.CanBeImmediate(node)
1456 ? g.UseImmediate(input.node)
1457 : g.UseRegister(input.node);
1458 Emit(kIA32Poke | MiscField::encode(slot), g.NoOutput(), value);
1459 }
1460 }
1461 } else {
1462 // Push any stack arguments.
1463 int effect_level = GetEffectLevel(node);
1464 int stack_decrement = 0;
1465 for (PushParameter input : base::Reversed(*arguments)) {
1466 stack_decrement += kSystemPointerSize;
1467 // Skip holes in the param array. These represent both extra slots for
1468 // multi-slot values and padding slots for alignment.
1469 if (input.node == nullptr) continue;
1470 InstructionOperand decrement = g.UseImmediate(stack_decrement);
1471 stack_decrement = 0;
1472 if (g.CanBeImmediate(input.node)) {
1473 Emit(kIA32Push, g.NoOutput(), decrement, g.UseImmediate(input.node));
1474 } else if (IsSupported(INTEL_ATOM) ||
1475 sequence()->IsFP(GetVirtualRegister(input.node))) {
1476 // TODO(bbudge): IA32Push cannot handle stack->stack double moves
1477 // because there is no way to encode fixed double slots.
1478 Emit(kIA32Push, g.NoOutput(), decrement, g.UseRegister(input.node));
1479 } else if (g.CanBeMemoryOperand(kIA32Push, node, input.node,
1480 effect_level)) {
1481 InstructionOperand outputs[1];
1482 InstructionOperand inputs[5];
1483 size_t input_count = 0;
1484 inputs[input_count++] = decrement;
1485 AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
1486 input.node, inputs, &input_count);
1487 InstructionCode opcode = kIA32Push | AddressingModeField::encode(mode);
1488 Emit(opcode, 0, outputs, input_count, inputs);
1489 } else {
1490 Emit(kIA32Push, g.NoOutput(), decrement, g.UseAny(input.node));
1491 }
1492 }
1493 }
1494 }
1495
EmitPrepareResults(ZoneVector<PushParameter> * results,const CallDescriptor * call_descriptor,Node * node)1496 void InstructionSelector::EmitPrepareResults(
1497 ZoneVector<PushParameter>* results, const CallDescriptor* call_descriptor,
1498 Node* node) {
1499 IA32OperandGenerator g(this);
1500
1501 for (PushParameter output : *results) {
1502 if (!output.location.IsCallerFrameSlot()) continue;
1503 // Skip any alignment holes in nodes.
1504 if (output.node != nullptr) {
1505 DCHECK(!call_descriptor->IsCFunctionCall());
1506 if (output.location.GetType() == MachineType::Float32()) {
1507 MarkAsFloat32(output.node);
1508 } else if (output.location.GetType() == MachineType::Float64()) {
1509 MarkAsFloat64(output.node);
1510 } else if (output.location.GetType() == MachineType::Simd128()) {
1511 MarkAsSimd128(output.node);
1512 }
1513 int offset = call_descriptor->GetOffsetToReturns();
1514 int reverse_slot = -output.location.GetLocation() - offset;
1515 Emit(kIA32Peek, g.DefineAsRegister(output.node),
1516 g.UseImmediate(reverse_slot));
1517 }
1518 }
1519 }
1520
IsTailCallAddressImmediate()1521 bool InstructionSelector::IsTailCallAddressImmediate() { return true; }
1522
1523 namespace {
1524
VisitCompareWithMemoryOperand(InstructionSelector * selector,InstructionCode opcode,Node * left,InstructionOperand right,FlagsContinuation * cont)1525 void VisitCompareWithMemoryOperand(InstructionSelector* selector,
1526 InstructionCode opcode, Node* left,
1527 InstructionOperand right,
1528 FlagsContinuation* cont) {
1529 DCHECK(left->opcode() == IrOpcode::kLoad ||
1530 left->opcode() == IrOpcode::kLoadImmutable);
1531 IA32OperandGenerator g(selector);
1532 size_t input_count = 0;
1533 InstructionOperand inputs[4];
1534 AddressingMode addressing_mode =
1535 g.GetEffectiveAddressMemoryOperand(left, inputs, &input_count);
1536 opcode |= AddressingModeField::encode(addressing_mode);
1537 inputs[input_count++] = right;
1538
1539 selector->EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
1540 }
1541
1542 // Shared routine for multiple compare operations.
VisitCompare(InstructionSelector * selector,InstructionCode opcode,InstructionOperand left,InstructionOperand right,FlagsContinuation * cont)1543 void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
1544 InstructionOperand left, InstructionOperand right,
1545 FlagsContinuation* cont) {
1546 selector->EmitWithContinuation(opcode, left, right, cont);
1547 }
1548
1549 // Shared routine for multiple compare operations.
VisitCompare(InstructionSelector * selector,InstructionCode opcode,Node * left,Node * right,FlagsContinuation * cont,bool commutative)1550 void VisitCompare(InstructionSelector* selector, InstructionCode opcode,
1551 Node* left, Node* right, FlagsContinuation* cont,
1552 bool commutative) {
1553 IA32OperandGenerator g(selector);
1554 if (commutative && g.CanBeBetterLeftOperand(right)) {
1555 std::swap(left, right);
1556 }
1557 VisitCompare(selector, opcode, g.UseRegister(left), g.Use(right), cont);
1558 }
1559
MachineTypeForNarrow(Node * node,Node * hint_node)1560 MachineType MachineTypeForNarrow(Node* node, Node* hint_node) {
1561 if (hint_node->opcode() == IrOpcode::kLoad ||
1562 hint_node->opcode() == IrOpcode::kLoadImmutable) {
1563 MachineType hint = LoadRepresentationOf(hint_node->op());
1564 if (node->opcode() == IrOpcode::kInt32Constant ||
1565 node->opcode() == IrOpcode::kInt64Constant) {
1566 int64_t constant = node->opcode() == IrOpcode::kInt32Constant
1567 ? OpParameter<int32_t>(node->op())
1568 : OpParameter<int64_t>(node->op());
1569 if (hint == MachineType::Int8()) {
1570 if (constant >= std::numeric_limits<int8_t>::min() &&
1571 constant <= std::numeric_limits<int8_t>::max()) {
1572 return hint;
1573 }
1574 } else if (hint == MachineType::Uint8()) {
1575 if (constant >= std::numeric_limits<uint8_t>::min() &&
1576 constant <= std::numeric_limits<uint8_t>::max()) {
1577 return hint;
1578 }
1579 } else if (hint == MachineType::Int16()) {
1580 if (constant >= std::numeric_limits<int16_t>::min() &&
1581 constant <= std::numeric_limits<int16_t>::max()) {
1582 return hint;
1583 }
1584 } else if (hint == MachineType::Uint16()) {
1585 if (constant >= std::numeric_limits<uint16_t>::min() &&
1586 constant <= std::numeric_limits<uint16_t>::max()) {
1587 return hint;
1588 }
1589 } else if (hint == MachineType::Int32()) {
1590 return hint;
1591 } else if (hint == MachineType::Uint32()) {
1592 if (constant >= 0) return hint;
1593 }
1594 }
1595 }
1596 return node->opcode() == IrOpcode::kLoad ||
1597 node->opcode() == IrOpcode::kLoadImmutable
1598 ? LoadRepresentationOf(node->op())
1599 : MachineType::None();
1600 }
1601
1602 // Tries to match the size of the given opcode to that of the operands, if
1603 // possible.
TryNarrowOpcodeSize(InstructionCode opcode,Node * left,Node * right,FlagsContinuation * cont)1604 InstructionCode TryNarrowOpcodeSize(InstructionCode opcode, Node* left,
1605 Node* right, FlagsContinuation* cont) {
1606 // TODO(epertoso): we can probably get some size information out of phi nodes.
1607 // If the load representations don't match, both operands will be
1608 // zero/sign-extended to 32bit.
1609 MachineType left_type = MachineTypeForNarrow(left, right);
1610 MachineType right_type = MachineTypeForNarrow(right, left);
1611 if (left_type == right_type) {
1612 switch (left_type.representation()) {
1613 case MachineRepresentation::kBit:
1614 case MachineRepresentation::kWord8: {
1615 if (opcode == kIA32Test) return kIA32Test8;
1616 if (opcode == kIA32Cmp) {
1617 if (left_type.semantic() == MachineSemantic::kUint32) {
1618 cont->OverwriteUnsignedIfSigned();
1619 } else {
1620 CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
1621 }
1622 return kIA32Cmp8;
1623 }
1624 break;
1625 }
1626 case MachineRepresentation::kWord16:
1627 if (opcode == kIA32Test) return kIA32Test16;
1628 if (opcode == kIA32Cmp) {
1629 if (left_type.semantic() == MachineSemantic::kUint32) {
1630 cont->OverwriteUnsignedIfSigned();
1631 } else {
1632 CHECK_EQ(MachineSemantic::kInt32, left_type.semantic());
1633 }
1634 return kIA32Cmp16;
1635 }
1636 break;
1637 default:
1638 break;
1639 }
1640 }
1641 return opcode;
1642 }
1643
1644 // Shared routine for multiple float32 compare operations (inputs commuted).
VisitFloat32Compare(InstructionSelector * selector,Node * node,FlagsContinuation * cont)1645 void VisitFloat32Compare(InstructionSelector* selector, Node* node,
1646 FlagsContinuation* cont) {
1647 Node* const left = node->InputAt(0);
1648 Node* const right = node->InputAt(1);
1649 VisitCompare(selector, kIA32Float32Cmp, right, left, cont, false);
1650 }
1651
1652 // Shared routine for multiple float64 compare operations (inputs commuted).
VisitFloat64Compare(InstructionSelector * selector,Node * node,FlagsContinuation * cont)1653 void VisitFloat64Compare(InstructionSelector* selector, Node* node,
1654 FlagsContinuation* cont) {
1655 Node* const left = node->InputAt(0);
1656 Node* const right = node->InputAt(1);
1657 VisitCompare(selector, kIA32Float64Cmp, right, left, cont, false);
1658 }
1659
1660 // Shared routine for multiple word compare operations.
VisitWordCompare(InstructionSelector * selector,Node * node,InstructionCode opcode,FlagsContinuation * cont)1661 void VisitWordCompare(InstructionSelector* selector, Node* node,
1662 InstructionCode opcode, FlagsContinuation* cont) {
1663 IA32OperandGenerator g(selector);
1664 Node* left = node->InputAt(0);
1665 Node* right = node->InputAt(1);
1666
1667 InstructionCode narrowed_opcode =
1668 TryNarrowOpcodeSize(opcode, left, right, cont);
1669
1670 int effect_level = selector->GetEffectLevel(node, cont);
1671
1672 // If one of the two inputs is an immediate, make sure it's on the right, or
1673 // if one of the two inputs is a memory operand, make sure it's on the left.
1674 if ((!g.CanBeImmediate(right) && g.CanBeImmediate(left)) ||
1675 (g.CanBeMemoryOperand(narrowed_opcode, node, right, effect_level) &&
1676 !g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level))) {
1677 if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
1678 std::swap(left, right);
1679 }
1680
1681 // Match immediates on right side of comparison.
1682 if (g.CanBeImmediate(right)) {
1683 if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
1684 return VisitCompareWithMemoryOperand(selector, narrowed_opcode, left,
1685 g.UseImmediate(right), cont);
1686 }
1687 return VisitCompare(selector, opcode, g.Use(left), g.UseImmediate(right),
1688 cont);
1689 }
1690
1691 // Match memory operands on left side of comparison.
1692 if (g.CanBeMemoryOperand(narrowed_opcode, node, left, effect_level)) {
1693 bool needs_byte_register =
1694 narrowed_opcode == kIA32Test8 || narrowed_opcode == kIA32Cmp8;
1695 return VisitCompareWithMemoryOperand(
1696 selector, narrowed_opcode, left,
1697 needs_byte_register ? g.UseByteRegister(right) : g.UseRegister(right),
1698 cont);
1699 }
1700
1701 return VisitCompare(selector, opcode, left, right, cont,
1702 node->op()->HasProperty(Operator::kCommutative));
1703 }
1704
VisitWordCompare(InstructionSelector * selector,Node * node,FlagsContinuation * cont)1705 void VisitWordCompare(InstructionSelector* selector, Node* node,
1706 FlagsContinuation* cont) {
1707 VisitWordCompare(selector, node, kIA32Cmp, cont);
1708 }
1709
VisitAtomicBinOp(InstructionSelector * selector,Node * node,ArchOpcode opcode,MachineRepresentation rep)1710 void VisitAtomicBinOp(InstructionSelector* selector, Node* node,
1711 ArchOpcode opcode, MachineRepresentation rep) {
1712 AddressingMode addressing_mode;
1713 IA32OperandGenerator g(selector);
1714 Node* base = node->InputAt(0);
1715 Node* index = node->InputAt(1);
1716 Node* value = node->InputAt(2);
1717 InstructionOperand inputs[] = {
1718 g.UseUniqueRegister(value), g.UseUniqueRegister(base),
1719 g.GetEffectiveIndexOperand(index, &addressing_mode)};
1720 InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
1721 InstructionOperand temp[] = {(rep == MachineRepresentation::kWord8)
1722 ? g.UseByteRegister(node)
1723 : g.TempRegister()};
1724 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
1725 selector->Emit(code, arraysize(outputs), outputs, arraysize(inputs), inputs,
1726 arraysize(temp), temp);
1727 }
1728
VisitPairAtomicBinOp(InstructionSelector * selector,Node * node,ArchOpcode opcode)1729 void VisitPairAtomicBinOp(InstructionSelector* selector, Node* node,
1730 ArchOpcode opcode) {
1731 IA32OperandGenerator g(selector);
1732 Node* base = node->InputAt(0);
1733 Node* index = node->InputAt(1);
1734 Node* value = node->InputAt(2);
1735 // For Word64 operations, the value input is split into the a high node,
1736 // and a low node in the int64-lowering phase.
1737 Node* value_high = node->InputAt(3);
1738
1739 // Wasm lives in 32-bit address space, so we do not need to worry about
1740 // base/index lowering. This will need to be fixed for Wasm64.
1741 AddressingMode addressing_mode;
1742 InstructionOperand inputs[] = {
1743 g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
1744 g.UseUniqueRegister(base),
1745 g.GetEffectiveIndexOperand(index, &addressing_mode)};
1746 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
1747 Node* projection0 = NodeProperties::FindProjection(node, 0);
1748 Node* projection1 = NodeProperties::FindProjection(node, 1);
1749 InstructionOperand outputs[2];
1750 size_t output_count = 0;
1751 InstructionOperand temps[2];
1752 size_t temp_count = 0;
1753 if (projection0) {
1754 outputs[output_count++] = g.DefineAsFixed(projection0, eax);
1755 } else {
1756 temps[temp_count++] = g.TempRegister(eax);
1757 }
1758 if (projection1) {
1759 outputs[output_count++] = g.DefineAsFixed(projection1, edx);
1760 } else {
1761 temps[temp_count++] = g.TempRegister(edx);
1762 }
1763 selector->Emit(code, output_count, outputs, arraysize(inputs), inputs,
1764 temp_count, temps);
1765 }
1766
1767 } // namespace
1768
1769 // Shared routine for word comparison with zero.
VisitWordCompareZero(Node * user,Node * value,FlagsContinuation * cont)1770 void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
1771 FlagsContinuation* cont) {
1772 // Try to combine with comparisons against 0 by simply inverting the branch.
1773 while (value->opcode() == IrOpcode::kWord32Equal && CanCover(user, value)) {
1774 Int32BinopMatcher m(value);
1775 if (!m.right().Is(0)) break;
1776
1777 user = value;
1778 value = m.left().node();
1779 cont->Negate();
1780 }
1781
1782 if (CanCover(user, value)) {
1783 switch (value->opcode()) {
1784 case IrOpcode::kWord32Equal:
1785 cont->OverwriteAndNegateIfEqual(kEqual);
1786 return VisitWordCompare(this, value, cont);
1787 case IrOpcode::kInt32LessThan:
1788 cont->OverwriteAndNegateIfEqual(kSignedLessThan);
1789 return VisitWordCompare(this, value, cont);
1790 case IrOpcode::kInt32LessThanOrEqual:
1791 cont->OverwriteAndNegateIfEqual(kSignedLessThanOrEqual);
1792 return VisitWordCompare(this, value, cont);
1793 case IrOpcode::kUint32LessThan:
1794 cont->OverwriteAndNegateIfEqual(kUnsignedLessThan);
1795 return VisitWordCompare(this, value, cont);
1796 case IrOpcode::kUint32LessThanOrEqual:
1797 cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
1798 return VisitWordCompare(this, value, cont);
1799 case IrOpcode::kFloat32Equal:
1800 cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
1801 return VisitFloat32Compare(this, value, cont);
1802 case IrOpcode::kFloat32LessThan:
1803 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
1804 return VisitFloat32Compare(this, value, cont);
1805 case IrOpcode::kFloat32LessThanOrEqual:
1806 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
1807 return VisitFloat32Compare(this, value, cont);
1808 case IrOpcode::kFloat64Equal:
1809 cont->OverwriteAndNegateIfEqual(kUnorderedEqual);
1810 return VisitFloat64Compare(this, value, cont);
1811 case IrOpcode::kFloat64LessThan:
1812 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThan);
1813 return VisitFloat64Compare(this, value, cont);
1814 case IrOpcode::kFloat64LessThanOrEqual:
1815 cont->OverwriteAndNegateIfEqual(kUnsignedGreaterThanOrEqual);
1816 return VisitFloat64Compare(this, value, cont);
1817 case IrOpcode::kProjection:
1818 // Check if this is the overflow output projection of an
1819 // <Operation>WithOverflow node.
1820 if (ProjectionIndexOf(value->op()) == 1u) {
1821 // We cannot combine the <Operation>WithOverflow with this branch
1822 // unless the 0th projection (the use of the actual value of the
1823 // <Operation> is either nullptr, which means there's no use of the
1824 // actual value, or was already defined, which means it is scheduled
1825 // *AFTER* this branch).
1826 Node* const node = value->InputAt(0);
1827 Node* const result = NodeProperties::FindProjection(node, 0);
1828 if (result == nullptr || IsDefined(result)) {
1829 switch (node->opcode()) {
1830 case IrOpcode::kInt32AddWithOverflow:
1831 cont->OverwriteAndNegateIfEqual(kOverflow);
1832 return VisitBinop(this, node, kIA32Add, cont);
1833 case IrOpcode::kInt32SubWithOverflow:
1834 cont->OverwriteAndNegateIfEqual(kOverflow);
1835 return VisitBinop(this, node, kIA32Sub, cont);
1836 case IrOpcode::kInt32MulWithOverflow:
1837 cont->OverwriteAndNegateIfEqual(kOverflow);
1838 return VisitBinop(this, node, kIA32Imul, cont);
1839 default:
1840 break;
1841 }
1842 }
1843 }
1844 break;
1845 case IrOpcode::kInt32Sub:
1846 return VisitWordCompare(this, value, cont);
1847 case IrOpcode::kWord32And:
1848 return VisitWordCompare(this, value, kIA32Test, cont);
1849 case IrOpcode::kStackPointerGreaterThan:
1850 cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
1851 return VisitStackPointerGreaterThan(value, cont);
1852 default:
1853 break;
1854 }
1855 }
1856
1857 // Continuation could not be combined with a compare, emit compare against 0.
1858 IA32OperandGenerator g(this);
1859 VisitCompare(this, kIA32Cmp, g.Use(value), g.TempImmediate(0), cont);
1860 }
1861
VisitSwitch(Node * node,const SwitchInfo & sw)1862 void InstructionSelector::VisitSwitch(Node* node, const SwitchInfo& sw) {
1863 IA32OperandGenerator g(this);
1864 InstructionOperand value_operand = g.UseRegister(node->InputAt(0));
1865
1866 // Emit either ArchTableSwitch or ArchBinarySearchSwitch.
1867 if (enable_switch_jump_table_ == kEnableSwitchJumpTable) {
1868 static const size_t kMaxTableSwitchValueRange = 2 << 16;
1869 size_t table_space_cost = 4 + sw.value_range();
1870 size_t table_time_cost = 3;
1871 size_t lookup_space_cost = 3 + 2 * sw.case_count();
1872 size_t lookup_time_cost = sw.case_count();
1873 if (sw.case_count() > 4 &&
1874 table_space_cost + 3 * table_time_cost <=
1875 lookup_space_cost + 3 * lookup_time_cost &&
1876 sw.min_value() > std::numeric_limits<int32_t>::min() &&
1877 sw.value_range() <= kMaxTableSwitchValueRange) {
1878 InstructionOperand index_operand = value_operand;
1879 if (sw.min_value()) {
1880 index_operand = g.TempRegister();
1881 Emit(kIA32Lea | AddressingModeField::encode(kMode_MRI), index_operand,
1882 value_operand, g.TempImmediate(-sw.min_value()));
1883 }
1884 // Generate a table lookup.
1885 return EmitTableSwitch(sw, index_operand);
1886 }
1887 }
1888
1889 // Generate a tree of conditional jumps.
1890 return EmitBinarySearchSwitch(sw, value_operand);
1891 }
1892
VisitWord32Equal(Node * const node)1893 void InstructionSelector::VisitWord32Equal(Node* const node) {
1894 FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
1895 Int32BinopMatcher m(node);
1896 if (m.right().Is(0)) {
1897 return VisitWordCompareZero(m.node(), m.left().node(), &cont);
1898 }
1899 VisitWordCompare(this, node, &cont);
1900 }
1901
VisitInt32LessThan(Node * node)1902 void InstructionSelector::VisitInt32LessThan(Node* node) {
1903 FlagsContinuation cont = FlagsContinuation::ForSet(kSignedLessThan, node);
1904 VisitWordCompare(this, node, &cont);
1905 }
1906
VisitInt32LessThanOrEqual(Node * node)1907 void InstructionSelector::VisitInt32LessThanOrEqual(Node* node) {
1908 FlagsContinuation cont =
1909 FlagsContinuation::ForSet(kSignedLessThanOrEqual, node);
1910 VisitWordCompare(this, node, &cont);
1911 }
1912
VisitUint32LessThan(Node * node)1913 void InstructionSelector::VisitUint32LessThan(Node* node) {
1914 FlagsContinuation cont = FlagsContinuation::ForSet(kUnsignedLessThan, node);
1915 VisitWordCompare(this, node, &cont);
1916 }
1917
VisitUint32LessThanOrEqual(Node * node)1918 void InstructionSelector::VisitUint32LessThanOrEqual(Node* node) {
1919 FlagsContinuation cont =
1920 FlagsContinuation::ForSet(kUnsignedLessThanOrEqual, node);
1921 VisitWordCompare(this, node, &cont);
1922 }
1923
VisitInt32AddWithOverflow(Node * node)1924 void InstructionSelector::VisitInt32AddWithOverflow(Node* node) {
1925 if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
1926 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
1927 return VisitBinop(this, node, kIA32Add, &cont);
1928 }
1929 FlagsContinuation cont;
1930 VisitBinop(this, node, kIA32Add, &cont);
1931 }
1932
VisitInt32SubWithOverflow(Node * node)1933 void InstructionSelector::VisitInt32SubWithOverflow(Node* node) {
1934 if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
1935 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
1936 return VisitBinop(this, node, kIA32Sub, &cont);
1937 }
1938 FlagsContinuation cont;
1939 VisitBinop(this, node, kIA32Sub, &cont);
1940 }
1941
VisitInt32MulWithOverflow(Node * node)1942 void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
1943 if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
1944 FlagsContinuation cont = FlagsContinuation::ForSet(kOverflow, ovf);
1945 return VisitBinop(this, node, kIA32Imul, &cont);
1946 }
1947 FlagsContinuation cont;
1948 VisitBinop(this, node, kIA32Imul, &cont);
1949 }
1950
VisitFloat32Equal(Node * node)1951 void InstructionSelector::VisitFloat32Equal(Node* node) {
1952 FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
1953 VisitFloat32Compare(this, node, &cont);
1954 }
1955
VisitFloat32LessThan(Node * node)1956 void InstructionSelector::VisitFloat32LessThan(Node* node) {
1957 FlagsContinuation cont =
1958 FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
1959 VisitFloat32Compare(this, node, &cont);
1960 }
1961
VisitFloat32LessThanOrEqual(Node * node)1962 void InstructionSelector::VisitFloat32LessThanOrEqual(Node* node) {
1963 FlagsContinuation cont =
1964 FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
1965 VisitFloat32Compare(this, node, &cont);
1966 }
1967
VisitFloat64Equal(Node * node)1968 void InstructionSelector::VisitFloat64Equal(Node* node) {
1969 FlagsContinuation cont = FlagsContinuation::ForSet(kUnorderedEqual, node);
1970 VisitFloat64Compare(this, node, &cont);
1971 }
1972
VisitFloat64LessThan(Node * node)1973 void InstructionSelector::VisitFloat64LessThan(Node* node) {
1974 FlagsContinuation cont =
1975 FlagsContinuation::ForSet(kUnsignedGreaterThan, node);
1976 VisitFloat64Compare(this, node, &cont);
1977 }
1978
VisitFloat64LessThanOrEqual(Node * node)1979 void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) {
1980 FlagsContinuation cont =
1981 FlagsContinuation::ForSet(kUnsignedGreaterThanOrEqual, node);
1982 VisitFloat64Compare(this, node, &cont);
1983 }
1984
VisitFloat64InsertLowWord32(Node * node)1985 void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
1986 IA32OperandGenerator g(this);
1987 Node* left = node->InputAt(0);
1988 Node* right = node->InputAt(1);
1989 Float64Matcher mleft(left);
1990 if (mleft.HasResolvedValue() &&
1991 (bit_cast<uint64_t>(mleft.ResolvedValue()) >> 32) == 0u) {
1992 Emit(kIA32Float64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
1993 return;
1994 }
1995 Emit(kIA32Float64InsertLowWord32, g.DefineSameAsFirst(node),
1996 g.UseRegister(left), g.Use(right));
1997 }
1998
VisitFloat64InsertHighWord32(Node * node)1999 void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
2000 IA32OperandGenerator g(this);
2001 Node* left = node->InputAt(0);
2002 Node* right = node->InputAt(1);
2003 Emit(kIA32Float64InsertHighWord32, g.DefineSameAsFirst(node),
2004 g.UseRegister(left), g.Use(right));
2005 }
2006
VisitFloat64SilenceNaN(Node * node)2007 void InstructionSelector::VisitFloat64SilenceNaN(Node* node) {
2008 IA32OperandGenerator g(this);
2009 Emit(kIA32Float64SilenceNaN, g.DefineSameAsFirst(node),
2010 g.UseRegister(node->InputAt(0)));
2011 }
2012
VisitMemoryBarrier(Node * node)2013 void InstructionSelector::VisitMemoryBarrier(Node* node) {
2014 IA32OperandGenerator g(this);
2015 Emit(kIA32MFence, g.NoOutput());
2016 }
2017
VisitWord32AtomicLoad(Node * node)2018 void InstructionSelector::VisitWord32AtomicLoad(Node* node) {
2019 AtomicLoadParameters atomic_load_params = AtomicLoadParametersOf(node->op());
2020 LoadRepresentation load_rep = atomic_load_params.representation();
2021 DCHECK(load_rep.representation() == MachineRepresentation::kWord8 ||
2022 load_rep.representation() == MachineRepresentation::kWord16 ||
2023 load_rep.representation() == MachineRepresentation::kWord32 ||
2024 load_rep.representation() == MachineRepresentation::kTaggedSigned ||
2025 load_rep.representation() == MachineRepresentation::kTaggedPointer ||
2026 load_rep.representation() == MachineRepresentation::kTagged);
2027 USE(load_rep);
2028 // The memory order is ignored as both acquire and sequentially consistent
2029 // loads can emit MOV.
2030 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2031 VisitLoad(node, node, GetLoadOpcode(load_rep));
2032 }
2033
VisitWord32AtomicStore(Node * node)2034 void InstructionSelector::VisitWord32AtomicStore(Node* node) {
2035 AtomicStoreParameters store_params = AtomicStoreParametersOf(node->op());
2036 VisitStoreCommon(this, node, store_params.store_representation(),
2037 store_params.order());
2038 }
2039
VisitWord32AtomicExchange(Node * node)2040 void InstructionSelector::VisitWord32AtomicExchange(Node* node) {
2041 IA32OperandGenerator g(this);
2042 MachineType type = AtomicOpType(node->op());
2043 ArchOpcode opcode;
2044 if (type == MachineType::Int8()) {
2045 opcode = kAtomicExchangeInt8;
2046 } else if (type == MachineType::Uint8()) {
2047 opcode = kAtomicExchangeUint8;
2048 } else if (type == MachineType::Int16()) {
2049 opcode = kAtomicExchangeInt16;
2050 } else if (type == MachineType::Uint16()) {
2051 opcode = kAtomicExchangeUint16;
2052 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2053 opcode = kAtomicExchangeWord32;
2054 } else {
2055 UNREACHABLE();
2056 }
2057 VisitAtomicExchange(this, node, opcode, type.representation());
2058 }
2059
VisitWord32AtomicCompareExchange(Node * node)2060 void InstructionSelector::VisitWord32AtomicCompareExchange(Node* node) {
2061 IA32OperandGenerator g(this);
2062 Node* base = node->InputAt(0);
2063 Node* index = node->InputAt(1);
2064 Node* old_value = node->InputAt(2);
2065 Node* new_value = node->InputAt(3);
2066
2067 MachineType type = AtomicOpType(node->op());
2068 ArchOpcode opcode;
2069 if (type == MachineType::Int8()) {
2070 opcode = kAtomicCompareExchangeInt8;
2071 } else if (type == MachineType::Uint8()) {
2072 opcode = kAtomicCompareExchangeUint8;
2073 } else if (type == MachineType::Int16()) {
2074 opcode = kAtomicCompareExchangeInt16;
2075 } else if (type == MachineType::Uint16()) {
2076 opcode = kAtomicCompareExchangeUint16;
2077 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2078 opcode = kAtomicCompareExchangeWord32;
2079 } else {
2080 UNREACHABLE();
2081 }
2082 AddressingMode addressing_mode;
2083 InstructionOperand new_val_operand =
2084 (type.representation() == MachineRepresentation::kWord8)
2085 ? g.UseByteRegister(new_value)
2086 : g.UseUniqueRegister(new_value);
2087 InstructionOperand inputs[] = {
2088 g.UseFixed(old_value, eax), new_val_operand, g.UseUniqueRegister(base),
2089 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2090 InstructionOperand outputs[] = {g.DefineAsFixed(node, eax)};
2091 InstructionCode code = opcode | AddressingModeField::encode(addressing_mode);
2092 Emit(code, 1, outputs, arraysize(inputs), inputs);
2093 }
2094
VisitWord32AtomicBinaryOperation(Node * node,ArchOpcode int8_op,ArchOpcode uint8_op,ArchOpcode int16_op,ArchOpcode uint16_op,ArchOpcode word32_op)2095 void InstructionSelector::VisitWord32AtomicBinaryOperation(
2096 Node* node, ArchOpcode int8_op, ArchOpcode uint8_op, ArchOpcode int16_op,
2097 ArchOpcode uint16_op, ArchOpcode word32_op) {
2098 MachineType type = AtomicOpType(node->op());
2099 ArchOpcode opcode;
2100 if (type == MachineType::Int8()) {
2101 opcode = int8_op;
2102 } else if (type == MachineType::Uint8()) {
2103 opcode = uint8_op;
2104 } else if (type == MachineType::Int16()) {
2105 opcode = int16_op;
2106 } else if (type == MachineType::Uint16()) {
2107 opcode = uint16_op;
2108 } else if (type == MachineType::Int32() || type == MachineType::Uint32()) {
2109 opcode = word32_op;
2110 } else {
2111 UNREACHABLE();
2112 }
2113 VisitAtomicBinOp(this, node, opcode, type.representation());
2114 }
2115
2116 #define VISIT_ATOMIC_BINOP(op) \
2117 void InstructionSelector::VisitWord32Atomic##op(Node* node) { \
2118 VisitWord32AtomicBinaryOperation( \
2119 node, kAtomic##op##Int8, kAtomic##op##Uint8, kAtomic##op##Int16, \
2120 kAtomic##op##Uint16, kAtomic##op##Word32); \
2121 }
2122 VISIT_ATOMIC_BINOP(Add)
VISIT_ATOMIC_BINOP(Sub)2123 VISIT_ATOMIC_BINOP(Sub)
2124 VISIT_ATOMIC_BINOP(And)
2125 VISIT_ATOMIC_BINOP(Or)
2126 VISIT_ATOMIC_BINOP(Xor)
2127 #undef VISIT_ATOMIC_BINOP
2128
2129 void InstructionSelector::VisitWord32AtomicPairLoad(Node* node) {
2130 // Both acquire and sequentially consistent loads can emit MOV.
2131 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2132 IA32OperandGenerator g(this);
2133 AddressingMode mode;
2134 Node* base = node->InputAt(0);
2135 Node* index = node->InputAt(1);
2136 Node* projection0 = NodeProperties::FindProjection(node, 0);
2137 Node* projection1 = NodeProperties::FindProjection(node, 1);
2138 if (projection0 && projection1) {
2139 InstructionOperand inputs[] = {g.UseUniqueRegister(base),
2140 g.GetEffectiveIndexOperand(index, &mode)};
2141 InstructionCode code =
2142 kIA32Word32AtomicPairLoad | AddressingModeField::encode(mode);
2143 InstructionOperand outputs[] = {g.DefineAsRegister(projection0),
2144 g.DefineAsRegister(projection1)};
2145 Emit(code, 2, outputs, 2, inputs);
2146 } else if (projection0 || projection1) {
2147 // Only one word is needed, so it's enough to load just that.
2148 ArchOpcode opcode = kIA32Movl;
2149
2150 InstructionOperand outputs[] = {
2151 g.DefineAsRegister(projection0 ? projection0 : projection1)};
2152 InstructionOperand inputs[3];
2153 size_t input_count = 0;
2154 // TODO(ahaas): Introduce an enum for {scale} instead of an integer.
2155 // {scale = 0} means *1 in the generated code.
2156 int scale = 0;
2157 AddressingMode mode = g.GenerateMemoryOperandInputs(
2158 index, scale, base, projection0 ? 0 : 4, kPositiveDisplacement, inputs,
2159 &input_count);
2160 InstructionCode code = opcode | AddressingModeField::encode(mode);
2161 Emit(code, 1, outputs, input_count, inputs);
2162 }
2163 }
2164
VisitWord32AtomicPairStore(Node * node)2165 void InstructionSelector::VisitWord32AtomicPairStore(Node* node) {
2166 // Release pair stores emit a MOVQ via a double register, and sequentially
2167 // consistent stores emit CMPXCHG8B.
2168 // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
2169
2170 IA32OperandGenerator g(this);
2171 Node* base = node->InputAt(0);
2172 Node* index = node->InputAt(1);
2173 Node* value = node->InputAt(2);
2174 Node* value_high = node->InputAt(3);
2175
2176 AtomicMemoryOrder order = OpParameter<AtomicMemoryOrder>(node->op());
2177 if (order == AtomicMemoryOrder::kAcqRel) {
2178 AddressingMode addressing_mode;
2179 InstructionOperand inputs[] = {
2180 g.UseUniqueRegisterOrSlotOrConstant(value),
2181 g.UseUniqueRegisterOrSlotOrConstant(value_high),
2182 g.UseUniqueRegister(base),
2183 g.GetEffectiveIndexOperand(index, &addressing_mode),
2184 };
2185 InstructionCode code = kIA32Word32ReleasePairStore |
2186 AddressingModeField::encode(addressing_mode);
2187 Emit(code, 0, nullptr, arraysize(inputs), inputs);
2188 } else {
2189 DCHECK_EQ(order, AtomicMemoryOrder::kSeqCst);
2190
2191 AddressingMode addressing_mode;
2192 InstructionOperand inputs[] = {
2193 g.UseUniqueRegisterOrSlotOrConstant(value), g.UseFixed(value_high, ecx),
2194 g.UseUniqueRegister(base),
2195 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2196 // Allocating temp registers here as stores are performed using an atomic
2197 // exchange, the output of which is stored in edx:eax, which should be saved
2198 // and restored at the end of the instruction.
2199 InstructionOperand temps[] = {g.TempRegister(eax), g.TempRegister(edx)};
2200 const int num_temps = arraysize(temps);
2201 InstructionCode code = kIA32Word32SeqCstPairStore |
2202 AddressingModeField::encode(addressing_mode);
2203 Emit(code, 0, nullptr, arraysize(inputs), inputs, num_temps, temps);
2204 }
2205 }
2206
VisitWord32AtomicPairAdd(Node * node)2207 void InstructionSelector::VisitWord32AtomicPairAdd(Node* node) {
2208 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAdd);
2209 }
2210
VisitWord32AtomicPairSub(Node * node)2211 void InstructionSelector::VisitWord32AtomicPairSub(Node* node) {
2212 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairSub);
2213 }
2214
VisitWord32AtomicPairAnd(Node * node)2215 void InstructionSelector::VisitWord32AtomicPairAnd(Node* node) {
2216 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairAnd);
2217 }
2218
VisitWord32AtomicPairOr(Node * node)2219 void InstructionSelector::VisitWord32AtomicPairOr(Node* node) {
2220 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairOr);
2221 }
2222
VisitWord32AtomicPairXor(Node * node)2223 void InstructionSelector::VisitWord32AtomicPairXor(Node* node) {
2224 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairXor);
2225 }
2226
VisitWord32AtomicPairExchange(Node * node)2227 void InstructionSelector::VisitWord32AtomicPairExchange(Node* node) {
2228 VisitPairAtomicBinOp(this, node, kIA32Word32AtomicPairExchange);
2229 }
2230
VisitWord32AtomicPairCompareExchange(Node * node)2231 void InstructionSelector::VisitWord32AtomicPairCompareExchange(Node* node) {
2232 IA32OperandGenerator g(this);
2233 Node* index = node->InputAt(1);
2234 AddressingMode addressing_mode;
2235
2236 InstructionOperand inputs[] = {
2237 // High, Low values of old value
2238 g.UseFixed(node->InputAt(2), eax), g.UseFixed(node->InputAt(3), edx),
2239 // High, Low values of new value
2240 g.UseUniqueRegisterOrSlotOrConstant(node->InputAt(4)),
2241 g.UseFixed(node->InputAt(5), ecx),
2242 // InputAt(0) => base
2243 g.UseUniqueRegister(node->InputAt(0)),
2244 g.GetEffectiveIndexOperand(index, &addressing_mode)};
2245 Node* projection0 = NodeProperties::FindProjection(node, 0);
2246 Node* projection1 = NodeProperties::FindProjection(node, 1);
2247 InstructionCode code = kIA32Word32AtomicPairCompareExchange |
2248 AddressingModeField::encode(addressing_mode);
2249
2250 InstructionOperand outputs[2];
2251 size_t output_count = 0;
2252 InstructionOperand temps[2];
2253 size_t temp_count = 0;
2254 if (projection0) {
2255 outputs[output_count++] = g.DefineAsFixed(projection0, eax);
2256 } else {
2257 temps[temp_count++] = g.TempRegister(eax);
2258 }
2259 if (projection1) {
2260 outputs[output_count++] = g.DefineAsFixed(projection1, edx);
2261 } else {
2262 temps[temp_count++] = g.TempRegister(edx);
2263 }
2264 Emit(code, output_count, outputs, arraysize(inputs), inputs, temp_count,
2265 temps);
2266 }
2267
2268 #define SIMD_INT_TYPES(V) \
2269 V(I32x4) \
2270 V(I16x8) \
2271 V(I8x16)
2272
2273 #define SIMD_BINOP_LIST(V) \
2274 V(I32x4GtU) \
2275 V(I32x4GeU) \
2276 V(I16x8Ne) \
2277 V(I16x8GeS) \
2278 V(I16x8GtU) \
2279 V(I16x8GeU) \
2280 V(I8x16Ne) \
2281 V(I8x16GeS) \
2282 V(I8x16GtU) \
2283 V(I8x16GeU)
2284
2285 #define SIMD_BINOP_UNIFIED_SSE_AVX_LIST(V) \
2286 V(F32x4Add) \
2287 V(F32x4Sub) \
2288 V(F32x4Mul) \
2289 V(F32x4Div) \
2290 V(F32x4Eq) \
2291 V(F32x4Ne) \
2292 V(F32x4Lt) \
2293 V(F32x4Le) \
2294 V(F32x4Min) \
2295 V(F32x4Max) \
2296 V(I64x2Add) \
2297 V(I64x2Sub) \
2298 V(I64x2Eq) \
2299 V(I64x2Ne) \
2300 V(I32x4Add) \
2301 V(I32x4Sub) \
2302 V(I32x4Mul) \
2303 V(I32x4MinS) \
2304 V(I32x4MaxS) \
2305 V(I32x4Eq) \
2306 V(I32x4Ne) \
2307 V(I32x4GtS) \
2308 V(I32x4GeS) \
2309 V(I32x4MinU) \
2310 V(I32x4MaxU) \
2311 V(I32x4DotI16x8S) \
2312 V(I16x8Add) \
2313 V(I16x8AddSatS) \
2314 V(I16x8Sub) \
2315 V(I16x8SubSatS) \
2316 V(I16x8Mul) \
2317 V(I16x8Eq) \
2318 V(I16x8GtS) \
2319 V(I16x8MinS) \
2320 V(I16x8MaxS) \
2321 V(I16x8AddSatU) \
2322 V(I16x8SubSatU) \
2323 V(I16x8MinU) \
2324 V(I16x8MaxU) \
2325 V(I16x8SConvertI32x4) \
2326 V(I16x8UConvertI32x4) \
2327 V(I16x8RoundingAverageU) \
2328 V(I8x16Add) \
2329 V(I8x16AddSatS) \
2330 V(I8x16Sub) \
2331 V(I8x16SubSatS) \
2332 V(I8x16MinS) \
2333 V(I8x16MaxS) \
2334 V(I8x16Eq) \
2335 V(I8x16GtS) \
2336 V(I8x16AddSatU) \
2337 V(I8x16SubSatU) \
2338 V(I8x16MinU) \
2339 V(I8x16MaxU) \
2340 V(I8x16SConvertI16x8) \
2341 V(I8x16UConvertI16x8) \
2342 V(I8x16RoundingAverageU) \
2343 V(S128And) \
2344 V(S128Or) \
2345 V(S128Xor)
2346
2347 // These opcodes require all inputs to be registers because the codegen is
2348 // simpler with all registers.
2349 #define SIMD_BINOP_RRR(V) \
2350 V(I64x2ExtMulLowI32x4S) \
2351 V(I64x2ExtMulHighI32x4S) \
2352 V(I64x2ExtMulLowI32x4U) \
2353 V(I64x2ExtMulHighI32x4U) \
2354 V(I32x4ExtMulLowI16x8S) \
2355 V(I32x4ExtMulHighI16x8S) \
2356 V(I32x4ExtMulLowI16x8U) \
2357 V(I32x4ExtMulHighI16x8U) \
2358 V(I16x8ExtMulLowI8x16S) \
2359 V(I16x8ExtMulHighI8x16S) \
2360 V(I16x8ExtMulLowI8x16U) \
2361 V(I16x8ExtMulHighI8x16U) \
2362 V(I16x8Q15MulRSatS)
2363
2364 #define SIMD_UNOP_LIST(V) \
2365 V(F64x2ConvertLowI32x4S) \
2366 V(F32x4DemoteF64x2Zero) \
2367 V(F32x4Sqrt) \
2368 V(F32x4SConvertI32x4) \
2369 V(F32x4RecipApprox) \
2370 V(F32x4RecipSqrtApprox) \
2371 V(I64x2BitMask) \
2372 V(I64x2SConvertI32x4Low) \
2373 V(I64x2SConvertI32x4High) \
2374 V(I64x2UConvertI32x4Low) \
2375 V(I64x2UConvertI32x4High) \
2376 V(I32x4SConvertI16x8Low) \
2377 V(I32x4SConvertI16x8High) \
2378 V(I32x4Neg) \
2379 V(I32x4UConvertI16x8Low) \
2380 V(I32x4UConvertI16x8High) \
2381 V(I32x4Abs) \
2382 V(I32x4BitMask) \
2383 V(I16x8SConvertI8x16Low) \
2384 V(I16x8SConvertI8x16High) \
2385 V(I16x8Neg) \
2386 V(I16x8UConvertI8x16Low) \
2387 V(I16x8UConvertI8x16High) \
2388 V(I16x8Abs) \
2389 V(I8x16Neg) \
2390 V(I8x16Abs) \
2391 V(I8x16BitMask) \
2392 V(S128Not)
2393
2394 #define SIMD_ALLTRUE_LIST(V) \
2395 V(I64x2AllTrue) \
2396 V(I32x4AllTrue) \
2397 V(I16x8AllTrue) \
2398 V(I8x16AllTrue)
2399
2400 #define SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(V) \
2401 V(I64x2Shl) \
2402 V(I64x2ShrU) \
2403 V(I32x4Shl) \
2404 V(I32x4ShrS) \
2405 V(I32x4ShrU) \
2406 V(I16x8Shl) \
2407 V(I16x8ShrS) \
2408 V(I16x8ShrU)
2409
VisitS128Const(Node * node)2410 void InstructionSelector::VisitS128Const(Node* node) {
2411 IA32OperandGenerator g(this);
2412 static const int kUint32Immediates = kSimd128Size / sizeof(uint32_t);
2413 uint32_t val[kUint32Immediates];
2414 memcpy(val, S128ImmediateParameterOf(node->op()).data(), kSimd128Size);
2415 // If all bytes are zeros or ones, avoid emitting code for generic constants
2416 bool all_zeros = !(val[0] || val[1] || val[2] || val[3]);
2417 bool all_ones = val[0] == UINT32_MAX && val[1] == UINT32_MAX &&
2418 val[2] == UINT32_MAX && val[3] == UINT32_MAX;
2419 InstructionOperand dst = g.DefineAsRegister(node);
2420 if (all_zeros) {
2421 Emit(kIA32S128Zero, dst);
2422 } else if (all_ones) {
2423 Emit(kIA32S128AllOnes, dst);
2424 } else {
2425 InstructionOperand inputs[kUint32Immediates];
2426 for (int i = 0; i < kUint32Immediates; ++i) {
2427 inputs[i] = g.UseImmediate(val[i]);
2428 }
2429 InstructionOperand temp(g.TempRegister());
2430 Emit(kIA32S128Const, 1, &dst, kUint32Immediates, inputs, 1, &temp);
2431 }
2432 }
2433
VisitF64x2Min(Node * node)2434 void InstructionSelector::VisitF64x2Min(Node* node) {
2435 IA32OperandGenerator g(this);
2436 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
2437 InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
2438
2439 if (IsSupported(AVX)) {
2440 Emit(kIA32F64x2Min, g.DefineAsRegister(node), operand0, operand1);
2441 } else {
2442 Emit(kIA32F64x2Min, g.DefineSameAsFirst(node), operand0, operand1);
2443 }
2444 }
2445
VisitF64x2Max(Node * node)2446 void InstructionSelector::VisitF64x2Max(Node* node) {
2447 IA32OperandGenerator g(this);
2448 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
2449 InstructionOperand operand1 = g.UseRegister(node->InputAt(1));
2450 if (IsSupported(AVX)) {
2451 Emit(kIA32F64x2Max, g.DefineAsRegister(node), operand0, operand1);
2452 } else {
2453 Emit(kIA32F64x2Max, g.DefineSameAsFirst(node), operand0, operand1);
2454 }
2455 }
2456
VisitF64x2Splat(Node * node)2457 void InstructionSelector::VisitF64x2Splat(Node* node) {
2458 VisitRRSimd(this, node, kIA32F64x2Splat);
2459 }
2460
VisitF64x2ExtractLane(Node * node)2461 void InstructionSelector::VisitF64x2ExtractLane(Node* node) {
2462 VisitRRISimd(this, node, kIA32F64x2ExtractLane, kIA32F64x2ExtractLane);
2463 }
2464
VisitI64x2SplatI32Pair(Node * node)2465 void InstructionSelector::VisitI64x2SplatI32Pair(Node* node) {
2466 IA32OperandGenerator g(this);
2467 Int32Matcher match_left(node->InputAt(0));
2468 Int32Matcher match_right(node->InputAt(1));
2469 if (match_left.Is(0) && match_right.Is(0)) {
2470 Emit(kIA32S128Zero, g.DefineAsRegister(node));
2471 } else {
2472 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
2473 InstructionOperand operand1 = g.Use(node->InputAt(1));
2474 Emit(kIA32I64x2SplatI32Pair, g.DefineAsRegister(node), operand0, operand1);
2475 }
2476 }
2477
VisitI64x2ReplaceLaneI32Pair(Node * node)2478 void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
2479 IA32OperandGenerator g(this);
2480 InstructionOperand operand = g.UseRegister(node->InputAt(0));
2481 InstructionOperand lane = g.UseImmediate(OpParameter<int32_t>(node->op()));
2482 InstructionOperand low = g.Use(node->InputAt(1));
2483 InstructionOperand high = g.Use(node->InputAt(2));
2484 Emit(kIA32I64x2ReplaceLaneI32Pair, g.DefineSameAsFirst(node), operand, lane,
2485 low, high);
2486 }
2487
VisitI64x2Neg(Node * node)2488 void InstructionSelector::VisitI64x2Neg(Node* node) {
2489 IA32OperandGenerator g(this);
2490 // If AVX unsupported, make sure dst != src to avoid a move.
2491 InstructionOperand operand0 = IsSupported(AVX)
2492 ? g.UseRegister(node->InputAt(0))
2493 : g.UseUnique(node->InputAt(0));
2494 Emit(kIA32I64x2Neg, g.DefineAsRegister(node), operand0);
2495 }
2496
VisitI64x2ShrS(Node * node)2497 void InstructionSelector::VisitI64x2ShrS(Node* node) {
2498 IA32OperandGenerator g(this);
2499 InstructionOperand dst =
2500 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2501
2502 if (g.CanBeImmediate(node->InputAt(1))) {
2503 Emit(kIA32I64x2ShrS, dst, g.UseRegister(node->InputAt(0)),
2504 g.UseImmediate(node->InputAt(1)));
2505 } else {
2506 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
2507 Emit(kIA32I64x2ShrS, dst, g.UseUniqueRegister(node->InputAt(0)),
2508 g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
2509 }
2510 }
2511
VisitI64x2Mul(Node * node)2512 void InstructionSelector::VisitI64x2Mul(Node* node) {
2513 IA32OperandGenerator g(this);
2514 InstructionOperand temps[] = {g.TempSimd128Register(),
2515 g.TempSimd128Register()};
2516 Emit(kIA32I64x2Mul, g.DefineAsRegister(node),
2517 g.UseUniqueRegister(node->InputAt(0)),
2518 g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
2519 }
2520
VisitF32x4Splat(Node * node)2521 void InstructionSelector::VisitF32x4Splat(Node* node) {
2522 VisitRRSimd(this, node, kIA32F32x4Splat);
2523 }
2524
VisitF32x4ExtractLane(Node * node)2525 void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
2526 IA32OperandGenerator g(this);
2527 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
2528 InstructionOperand operand1 =
2529 g.UseImmediate(OpParameter<int32_t>(node->op()));
2530 Emit(kIA32F32x4ExtractLane, g.DefineAsRegister(node), operand0, operand1);
2531 }
2532
VisitF32x4UConvertI32x4(Node * node)2533 void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
2534 VisitRRSimd(this, node, kIA32F32x4UConvertI32x4);
2535 }
2536
VisitI32x4SConvertF32x4(Node * node)2537 void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
2538 IA32OperandGenerator g(this);
2539 InstructionOperand temps[] = {g.TempRegister()};
2540 InstructionOperand dst =
2541 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2542 Emit(kIA32I32x4SConvertF32x4, dst, g.UseRegister(node->InputAt(0)),
2543 arraysize(temps), temps);
2544 }
2545
VisitI32x4UConvertF32x4(Node * node)2546 void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
2547 IA32OperandGenerator g(this);
2548 InstructionOperand temps[] = {g.TempSimd128Register()};
2549 InstructionCode opcode =
2550 IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
2551 Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
2552 arraysize(temps), temps);
2553 }
2554
VisitS128Zero(Node * node)2555 void InstructionSelector::VisitS128Zero(Node* node) {
2556 IA32OperandGenerator g(this);
2557 Emit(kIA32S128Zero, g.DefineAsRegister(node));
2558 }
2559
VisitS128Select(Node * node)2560 void InstructionSelector::VisitS128Select(Node* node) {
2561 IA32OperandGenerator g(this);
2562 InstructionOperand dst =
2563 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2564 Emit(kIA32S128Select, dst, g.UseRegister(node->InputAt(0)),
2565 g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
2566 }
2567
VisitS128AndNot(Node * node)2568 void InstructionSelector::VisitS128AndNot(Node* node) {
2569 IA32OperandGenerator g(this);
2570 // andnps a b does ~a & b, but we want a & !b, so flip the input.
2571 InstructionOperand dst =
2572 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2573 Emit(kIA32S128AndNot, dst, g.UseRegister(node->InputAt(1)),
2574 g.UseRegister(node->InputAt(0)));
2575 }
2576
2577 #define VISIT_SIMD_SPLAT(Type) \
2578 void InstructionSelector::Visit##Type##Splat(Node* node) { \
2579 Int32Matcher int32_matcher(node->InputAt(0)); \
2580 if (int32_matcher.Is(0)) { \
2581 IA32OperandGenerator g(this); \
2582 Emit(kIA32S128Zero, g.DefineAsRegister(node)); \
2583 } else { \
2584 VisitRO(this, node, kIA32##Type##Splat); \
2585 } \
2586 }
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)2587 SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
2588 #undef SIMD_INT_TYPES
2589 #undef VISIT_SIMD_SPLAT
2590
2591 void InstructionSelector::VisitI8x16ExtractLaneU(Node* node) {
2592 VisitRRISimd(this, node, kIA32Pextrb);
2593 }
2594
VisitI8x16ExtractLaneS(Node * node)2595 void InstructionSelector::VisitI8x16ExtractLaneS(Node* node) {
2596 VisitRRISimd(this, node, kIA32I8x16ExtractLaneS);
2597 }
2598
VisitI16x8ExtractLaneU(Node * node)2599 void InstructionSelector::VisitI16x8ExtractLaneU(Node* node) {
2600 VisitRRISimd(this, node, kIA32Pextrw);
2601 }
2602
VisitI16x8ExtractLaneS(Node * node)2603 void InstructionSelector::VisitI16x8ExtractLaneS(Node* node) {
2604 VisitRRISimd(this, node, kIA32I16x8ExtractLaneS);
2605 }
2606
VisitI32x4ExtractLane(Node * node)2607 void InstructionSelector::VisitI32x4ExtractLane(Node* node) {
2608 VisitRRISimd(this, node, kIA32I32x4ExtractLane);
2609 }
2610
2611 #define SIMD_REPLACE_LANE_TYPE_OP(V) \
2612 V(I32x4, kIA32Pinsrd) \
2613 V(I16x8, kIA32Pinsrw) \
2614 V(I8x16, kIA32Pinsrb) \
2615 V(F32x4, kIA32Insertps)
2616
2617 #define VISIT_SIMD_REPLACE_LANE(TYPE, OPCODE) \
2618 void InstructionSelector::Visit##TYPE##ReplaceLane(Node* node) { \
2619 IA32OperandGenerator g(this); \
2620 InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
2621 InstructionOperand operand1 = \
2622 g.UseImmediate(OpParameter<int32_t>(node->op())); \
2623 InstructionOperand operand2 = g.Use(node->InputAt(1)); \
2624 InstructionOperand dst = IsSupported(AVX) ? g.DefineAsRegister(node) \
2625 : g.DefineSameAsFirst(node); \
2626 Emit(OPCODE, dst, operand0, operand1, operand2); \
2627 }
SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)2628 SIMD_REPLACE_LANE_TYPE_OP(VISIT_SIMD_REPLACE_LANE)
2629 #undef VISIT_SIMD_REPLACE_LANE
2630 #undef SIMD_REPLACE_LANE_TYPE_OP
2631
2632 void InstructionSelector::VisitF64x2ReplaceLane(Node* node) {
2633 IA32OperandGenerator g(this);
2634 int32_t lane = OpParameter<int32_t>(node->op());
2635 // When no-AVX, define dst == src to save a move.
2636 InstructionOperand dst =
2637 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
2638 Emit(kIA32F64x2ReplaceLane, dst, g.UseRegister(node->InputAt(0)),
2639 g.UseImmediate(lane), g.UseRegister(node->InputAt(1)));
2640 }
2641
2642 #define VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX(Opcode) \
2643 void InstructionSelector::Visit##Opcode(Node* node) { \
2644 VisitRROSimdShift(this, node, kIA32##Opcode); \
2645 }
2646 SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX(VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX)
2647 #undef VISIT_SIMD_SHIFT_UNIFIED_SSE_AVX
2648 #undef SIMD_SHIFT_OPCODES_UNIFED_SSE_AVX
2649
2650 // TODO(v8:9198): SSE requires operand0 to be a register as we don't have memory
2651 // alignment yet. For AVX, memory operands are fine, but can have performance
2652 // issues if not aligned to 16/32 bytes (based on load size), see SDM Vol 1,
2653 // chapter 14.9
2654 #define VISIT_SIMD_UNOP(Opcode) \
2655 void InstructionSelector::Visit##Opcode(Node* node) { \
2656 IA32OperandGenerator g(this); \
2657 Emit(kIA32##Opcode, g.DefineAsRegister(node), \
2658 g.UseRegister(node->InputAt(0))); \
2659 }
SIMD_UNOP_LIST(VISIT_SIMD_UNOP)2660 SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
2661 #undef VISIT_SIMD_UNOP
2662 #undef SIMD_UNOP_LIST
2663
2664 void InstructionSelector::VisitV128AnyTrue(Node* node) {
2665 IA32OperandGenerator g(this);
2666 InstructionOperand temps[] = {g.TempRegister()};
2667 Emit(kIA32S128AnyTrue, g.DefineAsRegister(node),
2668 g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
2669 }
2670
2671 #define VISIT_SIMD_ALLTRUE(Opcode) \
2672 void InstructionSelector::Visit##Opcode(Node* node) { \
2673 IA32OperandGenerator g(this); \
2674 InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
2675 Emit(kIA32##Opcode, g.DefineAsRegister(node), \
2676 g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
2677 }
2678 SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
2679 #undef VISIT_SIMD_ALLTRUE
2680 #undef SIMD_ALLTRUE_LIST
2681
2682 #define VISIT_SIMD_BINOP(Opcode) \
2683 void InstructionSelector::Visit##Opcode(Node* node) { \
2684 VisitRROSimd(this, node, kAVX##Opcode, kSSE##Opcode); \
2685 }
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)2686 SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
2687 #undef VISIT_SIMD_BINOP
2688 #undef SIMD_BINOP_LIST
2689
2690 #define VISIT_SIMD_BINOP_UNIFIED_SSE_AVX(Opcode) \
2691 void InstructionSelector::Visit##Opcode(Node* node) { \
2692 VisitRROSimd(this, node, kIA32##Opcode, kIA32##Opcode); \
2693 }
2694 SIMD_BINOP_UNIFIED_SSE_AVX_LIST(VISIT_SIMD_BINOP_UNIFIED_SSE_AVX)
2695 #undef VISIT_SIMD_BINOP_UNIFIED_SSE_AVX
2696 #undef SIMD_BINOP_UNIFIED_SSE_AVX_LIST
2697
2698 #define VISIT_SIMD_BINOP_RRR(OPCODE) \
2699 void InstructionSelector::Visit##OPCODE(Node* node) { \
2700 VisitRRRSimd(this, node, kIA32##OPCODE); \
2701 }
2702 SIMD_BINOP_RRR(VISIT_SIMD_BINOP_RRR)
2703 #undef VISIT_SIMD_BINOP_RRR
2704 #undef SIMD_BINOP_RRR
2705
2706 void InstructionSelector::VisitI16x8BitMask(Node* node) {
2707 IA32OperandGenerator g(this);
2708 InstructionOperand temps[] = {g.TempSimd128Register()};
2709 Emit(kIA32I16x8BitMask, g.DefineAsRegister(node),
2710 g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
2711 }
2712
VisitI8x16Shl(Node * node)2713 void InstructionSelector::VisitI8x16Shl(Node* node) {
2714 VisitI8x16Shift(this, node, kIA32I8x16Shl);
2715 }
2716
VisitI8x16ShrS(Node * node)2717 void InstructionSelector::VisitI8x16ShrS(Node* node) {
2718 VisitI8x16Shift(this, node, kIA32I8x16ShrS);
2719 }
2720
VisitI8x16ShrU(Node * node)2721 void InstructionSelector::VisitI8x16ShrU(Node* node) {
2722 VisitI8x16Shift(this, node, kIA32I8x16ShrU);
2723 }
2724
VisitInt32AbsWithOverflow(Node * node)2725 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
2726 UNREACHABLE();
2727 }
2728
VisitInt64AbsWithOverflow(Node * node)2729 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
2730 UNREACHABLE();
2731 }
2732
2733 #if V8_ENABLE_WEBASSEMBLY
2734 namespace {
2735
2736 // Returns true if shuffle can be decomposed into two 16x4 half shuffles
2737 // followed by a 16x8 blend.
2738 // E.g. [3 2 1 0 15 14 13 12].
TryMatch16x8HalfShuffle(uint8_t * shuffle16x8,uint8_t * blend_mask)2739 bool TryMatch16x8HalfShuffle(uint8_t* shuffle16x8, uint8_t* blend_mask) {
2740 *blend_mask = 0;
2741 for (int i = 0; i < 8; i++) {
2742 if ((shuffle16x8[i] & 0x4) != (i & 0x4)) return false;
2743 *blend_mask |= (shuffle16x8[i] > 7 ? 1 : 0) << i;
2744 }
2745 return true;
2746 }
2747
2748 struct ShuffleEntry {
2749 uint8_t shuffle[kSimd128Size];
2750 ArchOpcode opcode;
2751 ArchOpcode avx_opcode;
2752 bool src0_needs_reg;
2753 bool src1_needs_reg;
2754 };
2755
2756 // Shuffles that map to architecture-specific instruction sequences. These are
2757 // matched very early, so we shouldn't include shuffles that match better in
2758 // later tests, like 32x4 and 16x8 shuffles. In general, these patterns should
2759 // map to either a single instruction, or be finer grained, such as zip/unzip or
2760 // transpose patterns.
2761 static const ShuffleEntry arch_shuffles[] = {
2762 {{0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23},
2763 kIA32S64x2UnpackLow,
2764 kIA32S64x2UnpackLow,
2765 true,
2766 false},
2767 {{8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31},
2768 kIA32S64x2UnpackHigh,
2769 kIA32S64x2UnpackHigh,
2770 true,
2771 false},
2772 {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
2773 kIA32S32x4UnpackLow,
2774 kIA32S32x4UnpackLow,
2775 true,
2776 false},
2777 {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
2778 kIA32S32x4UnpackHigh,
2779 kIA32S32x4UnpackHigh,
2780 true,
2781 false},
2782 {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
2783 kIA32S16x8UnpackLow,
2784 kIA32S16x8UnpackLow,
2785 true,
2786 false},
2787 {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
2788 kIA32S16x8UnpackHigh,
2789 kIA32S16x8UnpackHigh,
2790 true,
2791 false},
2792 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
2793 kIA32S8x16UnpackLow,
2794 kIA32S8x16UnpackLow,
2795 true,
2796 false},
2797 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
2798 kIA32S8x16UnpackHigh,
2799 kIA32S8x16UnpackHigh,
2800 true,
2801 false},
2802
2803 {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
2804 kSSES16x8UnzipLow,
2805 kAVXS16x8UnzipLow,
2806 true,
2807 false},
2808 {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
2809 kSSES16x8UnzipHigh,
2810 kAVXS16x8UnzipHigh,
2811 true,
2812 true},
2813 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
2814 kSSES8x16UnzipLow,
2815 kAVXS8x16UnzipLow,
2816 true,
2817 true},
2818 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
2819 kSSES8x16UnzipHigh,
2820 kAVXS8x16UnzipHigh,
2821 true,
2822 true},
2823
2824 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
2825 kSSES8x16TransposeLow,
2826 kAVXS8x16TransposeLow,
2827 true,
2828 true},
2829 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
2830 kSSES8x16TransposeHigh,
2831 kAVXS8x16TransposeHigh,
2832 true,
2833 true},
2834 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
2835 kSSES8x8Reverse,
2836 kAVXS8x8Reverse,
2837 true,
2838 true},
2839 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
2840 kSSES8x4Reverse,
2841 kAVXS8x4Reverse,
2842 true,
2843 true},
2844 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
2845 kSSES8x2Reverse,
2846 kAVXS8x2Reverse,
2847 true,
2848 true}};
2849
TryMatchArchShuffle(const uint8_t * shuffle,const ShuffleEntry * table,size_t num_entries,bool is_swizzle,const ShuffleEntry ** arch_shuffle)2850 bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
2851 size_t num_entries, bool is_swizzle,
2852 const ShuffleEntry** arch_shuffle) {
2853 uint8_t mask = is_swizzle ? kSimd128Size - 1 : 2 * kSimd128Size - 1;
2854 for (size_t i = 0; i < num_entries; ++i) {
2855 const ShuffleEntry& entry = table[i];
2856 int j = 0;
2857 for (; j < kSimd128Size; ++j) {
2858 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
2859 break;
2860 }
2861 }
2862 if (j == kSimd128Size) {
2863 *arch_shuffle = &entry;
2864 return true;
2865 }
2866 }
2867 return false;
2868 }
2869
2870 } // namespace
2871
VisitI8x16Shuffle(Node * node)2872 void InstructionSelector::VisitI8x16Shuffle(Node* node) {
2873 uint8_t shuffle[kSimd128Size];
2874 bool is_swizzle;
2875 CanonicalizeShuffle(node, shuffle, &is_swizzle);
2876
2877 int imm_count = 0;
2878 static const int kMaxImms = 6;
2879 uint32_t imms[kMaxImms];
2880 int temp_count = 0;
2881 static const int kMaxTemps = 2;
2882 InstructionOperand temps[kMaxTemps];
2883
2884 IA32OperandGenerator g(this);
2885 bool use_avx = CpuFeatures::IsSupported(AVX);
2886 // AVX and swizzles don't generally need DefineSameAsFirst to avoid a move.
2887 bool no_same_as_first = use_avx || is_swizzle;
2888 // We generally need UseRegister for input0, Use for input1.
2889 // TODO(v8:9198): We don't have 16-byte alignment for SIMD operands yet, but
2890 // we retain this logic (continue setting these in the various shuffle match
2891 // clauses), but ignore it when selecting registers or slots.
2892 bool src0_needs_reg = true;
2893 bool src1_needs_reg = false;
2894 ArchOpcode opcode = kIA32I8x16Shuffle; // general shuffle is the default
2895
2896 uint8_t offset;
2897 uint8_t shuffle32x4[4];
2898 uint8_t shuffle16x8[8];
2899 int index;
2900 const ShuffleEntry* arch_shuffle;
2901 if (wasm::SimdShuffle::TryMatchConcat(shuffle, &offset)) {
2902 if (wasm::SimdShuffle::TryMatch32x4Rotate(shuffle, shuffle32x4,
2903 is_swizzle)) {
2904 uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
2905 opcode = kIA32S32x4Rotate;
2906 imms[imm_count++] = shuffle_mask;
2907 } else {
2908 // Swap inputs from the normal order for (v)palignr.
2909 SwapShuffleInputs(node);
2910 is_swizzle = false; // It's simpler to just handle the general case.
2911 no_same_as_first = use_avx; // SSE requires same-as-first.
2912 opcode = kIA32S8x16Alignr;
2913 // palignr takes a single imm8 offset.
2914 imms[imm_count++] = offset;
2915 }
2916 } else if (TryMatchArchShuffle(shuffle, arch_shuffles,
2917 arraysize(arch_shuffles), is_swizzle,
2918 &arch_shuffle)) {
2919 opcode = use_avx ? arch_shuffle->avx_opcode : arch_shuffle->opcode;
2920 src0_needs_reg = !use_avx || arch_shuffle->src0_needs_reg;
2921 // SSE can't take advantage of both operands in registers and needs
2922 // same-as-first.
2923 src1_needs_reg = use_avx && arch_shuffle->src1_needs_reg;
2924 no_same_as_first = use_avx;
2925 } else if (wasm::SimdShuffle::TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
2926 uint8_t shuffle_mask = wasm::SimdShuffle::PackShuffle4(shuffle32x4);
2927 if (is_swizzle) {
2928 if (wasm::SimdShuffle::TryMatchIdentity(shuffle)) {
2929 // Bypass normal shuffle code generation in this case.
2930 EmitIdentity(node);
2931 return;
2932 } else {
2933 // pshufd takes a single imm8 shuffle mask.
2934 opcode = kIA32S32x4Swizzle;
2935 no_same_as_first = true;
2936 // TODO(v8:9198): This doesn't strictly require a register, forcing the
2937 // swizzles to always use registers until generation of incorrect memory
2938 // operands can be fixed.
2939 src0_needs_reg = true;
2940 imms[imm_count++] = shuffle_mask;
2941 }
2942 } else {
2943 // 2 operand shuffle
2944 // A blend is more efficient than a general 32x4 shuffle; try it first.
2945 if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
2946 opcode = kIA32S16x8Blend;
2947 uint8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
2948 imms[imm_count++] = blend_mask;
2949 } else {
2950 opcode = kIA32S32x4Shuffle;
2951 no_same_as_first = true;
2952 // TODO(v8:9198): src0 and src1 is used by pshufd in codegen, which
2953 // requires memory to be 16-byte aligned, since we cannot guarantee that
2954 // yet, force using a register here.
2955 src0_needs_reg = true;
2956 src1_needs_reg = true;
2957 imms[imm_count++] = shuffle_mask;
2958 int8_t blend_mask = wasm::SimdShuffle::PackBlend4(shuffle32x4);
2959 imms[imm_count++] = blend_mask;
2960 }
2961 }
2962 } else if (wasm::SimdShuffle::TryMatch16x8Shuffle(shuffle, shuffle16x8)) {
2963 uint8_t blend_mask;
2964 if (wasm::SimdShuffle::TryMatchBlend(shuffle)) {
2965 opcode = kIA32S16x8Blend;
2966 blend_mask = wasm::SimdShuffle::PackBlend8(shuffle16x8);
2967 imms[imm_count++] = blend_mask;
2968 } else if (wasm::SimdShuffle::TryMatchSplat<8>(shuffle, &index)) {
2969 opcode = kIA32S16x8Dup;
2970 src0_needs_reg = false;
2971 imms[imm_count++] = index;
2972 } else if (TryMatch16x8HalfShuffle(shuffle16x8, &blend_mask)) {
2973 opcode = is_swizzle ? kIA32S16x8HalfShuffle1 : kIA32S16x8HalfShuffle2;
2974 // Half-shuffles don't need DefineSameAsFirst or UseRegister(src0).
2975 no_same_as_first = true;
2976 src0_needs_reg = false;
2977 uint8_t mask_lo = wasm::SimdShuffle::PackShuffle4(shuffle16x8);
2978 uint8_t mask_hi = wasm::SimdShuffle::PackShuffle4(shuffle16x8 + 4);
2979 imms[imm_count++] = mask_lo;
2980 imms[imm_count++] = mask_hi;
2981 if (!is_swizzle) imms[imm_count++] = blend_mask;
2982 }
2983 } else if (wasm::SimdShuffle::TryMatchSplat<16>(shuffle, &index)) {
2984 opcode = kIA32S8x16Dup;
2985 no_same_as_first = use_avx;
2986 src0_needs_reg = true;
2987 imms[imm_count++] = index;
2988 }
2989 if (opcode == kIA32I8x16Shuffle) {
2990 // Use same-as-first for general swizzle, but not shuffle.
2991 no_same_as_first = !is_swizzle;
2992 src0_needs_reg = !no_same_as_first;
2993 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle);
2994 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 4);
2995 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 8);
2996 imms[imm_count++] = wasm::SimdShuffle::Pack4Lanes(shuffle + 12);
2997 temps[temp_count++] = g.TempRegister();
2998 }
2999
3000 // Use DefineAsRegister(node) and Use(src0) if we can without forcing an extra
3001 // move instruction in the CodeGenerator.
3002 Node* input0 = node->InputAt(0);
3003 InstructionOperand dst =
3004 no_same_as_first ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3005 // TODO(v8:9198): Use src0_needs_reg when we have memory alignment for SIMD.
3006 InstructionOperand src0 = g.UseRegister(input0);
3007 USE(src0_needs_reg);
3008
3009 int input_count = 0;
3010 InstructionOperand inputs[2 + kMaxImms + kMaxTemps];
3011 inputs[input_count++] = src0;
3012 if (!is_swizzle) {
3013 Node* input1 = node->InputAt(1);
3014 // TODO(v8:9198): Use src1_needs_reg when we have memory alignment for SIMD.
3015 inputs[input_count++] = g.UseRegister(input1);
3016 USE(src1_needs_reg);
3017 }
3018 for (int i = 0; i < imm_count; ++i) {
3019 inputs[input_count++] = g.UseImmediate(imms[i]);
3020 }
3021 Emit(opcode, 1, &dst, input_count, inputs, temp_count, temps);
3022 }
3023
VisitI8x16Swizzle(Node * node)3024 void InstructionSelector::VisitI8x16Swizzle(Node* node) {
3025 InstructionCode op = kIA32I8x16Swizzle;
3026
3027 bool relaxed = OpParameter<bool>(node->op());
3028 if (relaxed) {
3029 op |= MiscField::encode(true);
3030 } else {
3031 auto m = V128ConstMatcher(node->InputAt(1));
3032 if (m.HasResolvedValue()) {
3033 // If the indices vector is a const, check if they are in range, or if the
3034 // top bit is set, then we can avoid the paddusb in the codegen and simply
3035 // emit a pshufb.
3036 auto imms = m.ResolvedValue().immediate();
3037 op |= MiscField::encode(wasm::SimdSwizzle::AllInRangeOrTopBitSet(imms));
3038 }
3039 }
3040
3041 IA32OperandGenerator g(this);
3042 InstructionOperand temps[] = {g.TempRegister()};
3043 Emit(op,
3044 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node),
3045 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
3046 arraysize(temps), temps);
3047 }
3048 #else
VisitI8x16Shuffle(Node * node)3049 void InstructionSelector::VisitI8x16Shuffle(Node* node) { UNREACHABLE(); }
VisitI8x16Swizzle(Node * node)3050 void InstructionSelector::VisitI8x16Swizzle(Node* node) { UNREACHABLE(); }
3051 #endif // V8_ENABLE_WEBASSEMBLY
3052
3053 namespace {
VisitMinOrMax(InstructionSelector * selector,Node * node,ArchOpcode opcode,bool flip_inputs)3054 void VisitMinOrMax(InstructionSelector* selector, Node* node, ArchOpcode opcode,
3055 bool flip_inputs) {
3056 // Due to the way minps/minpd work, we want the dst to be same as the second
3057 // input: b = pmin(a, b) directly maps to minps b a.
3058 IA32OperandGenerator g(selector);
3059 InstructionOperand dst = selector->IsSupported(AVX)
3060 ? g.DefineAsRegister(node)
3061 : g.DefineSameAsFirst(node);
3062 if (flip_inputs) {
3063 // Due to the way minps/minpd work, we want the dst to be same as the second
3064 // input: b = pmin(a, b) directly maps to minps b a.
3065 selector->Emit(opcode, dst, g.UseRegister(node->InputAt(1)),
3066 g.UseRegister(node->InputAt(0)));
3067 } else {
3068 selector->Emit(opcode, dst, g.UseRegister(node->InputAt(0)),
3069 g.UseRegister(node->InputAt(1)));
3070 }
3071 }
3072 } // namespace
3073
VisitF32x4Pmin(Node * node)3074 void InstructionSelector::VisitF32x4Pmin(Node* node) {
3075 VisitMinOrMax(this, node, kIA32Minps, true);
3076 }
3077
VisitF32x4Pmax(Node * node)3078 void InstructionSelector::VisitF32x4Pmax(Node* node) {
3079 VisitMinOrMax(this, node, kIA32Maxps, true);
3080 }
3081
VisitF64x2Pmin(Node * node)3082 void InstructionSelector::VisitF64x2Pmin(Node* node) {
3083 VisitMinOrMax(this, node, kIA32Minpd, true);
3084 }
3085
VisitF64x2Pmax(Node * node)3086 void InstructionSelector::VisitF64x2Pmax(Node* node) {
3087 VisitMinOrMax(this, node, kIA32Maxpd, true);
3088 }
3089
VisitF32x4RelaxedMin(Node * node)3090 void InstructionSelector::VisitF32x4RelaxedMin(Node* node) {
3091 VisitMinOrMax(this, node, kIA32Minps, false);
3092 }
3093
VisitF32x4RelaxedMax(Node * node)3094 void InstructionSelector::VisitF32x4RelaxedMax(Node* node) {
3095 VisitMinOrMax(this, node, kIA32Maxps, false);
3096 }
3097
VisitF64x2RelaxedMin(Node * node)3098 void InstructionSelector::VisitF64x2RelaxedMin(Node* node) {
3099 VisitMinOrMax(this, node, kIA32Minpd, false);
3100 }
3101
VisitF64x2RelaxedMax(Node * node)3102 void InstructionSelector::VisitF64x2RelaxedMax(Node* node) {
3103 VisitMinOrMax(this, node, kIA32Maxpd, false);
3104 }
3105
3106 namespace {
VisitExtAddPairwise(InstructionSelector * selector,Node * node,ArchOpcode opcode,bool need_temp)3107 void VisitExtAddPairwise(InstructionSelector* selector, Node* node,
3108 ArchOpcode opcode, bool need_temp) {
3109 IA32OperandGenerator g(selector);
3110 InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
3111 InstructionOperand dst = (selector->IsSupported(AVX))
3112 ? g.DefineAsRegister(node)
3113 : g.DefineSameAsFirst(node);
3114 if (need_temp) {
3115 InstructionOperand temps[] = {g.TempRegister()};
3116 selector->Emit(opcode, dst, operand0, arraysize(temps), temps);
3117 } else {
3118 selector->Emit(opcode, dst, operand0);
3119 }
3120 }
3121 } // namespace
3122
VisitI32x4ExtAddPairwiseI16x8S(Node * node)3123 void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
3124 VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8S, true);
3125 }
3126
VisitI32x4ExtAddPairwiseI16x8U(Node * node)3127 void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8U(Node* node) {
3128 VisitExtAddPairwise(this, node, kIA32I32x4ExtAddPairwiseI16x8U, false);
3129 }
3130
VisitI16x8ExtAddPairwiseI8x16S(Node * node)3131 void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16S(Node* node) {
3132 VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16S, true);
3133 }
3134
VisitI16x8ExtAddPairwiseI8x16U(Node * node)3135 void InstructionSelector::VisitI16x8ExtAddPairwiseI8x16U(Node* node) {
3136 VisitExtAddPairwise(this, node, kIA32I16x8ExtAddPairwiseI8x16U, true);
3137 }
3138
VisitI8x16Popcnt(Node * node)3139 void InstructionSelector::VisitI8x16Popcnt(Node* node) {
3140 IA32OperandGenerator g(this);
3141 InstructionOperand dst = CpuFeatures::IsSupported(AVX)
3142 ? g.DefineAsRegister(node)
3143 : g.DefineAsRegister(node);
3144 InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()};
3145 Emit(kIA32I8x16Popcnt, dst, g.UseUniqueRegister(node->InputAt(0)),
3146 arraysize(temps), temps);
3147 }
3148
VisitF64x2ConvertLowI32x4U(Node * node)3149 void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
3150 IA32OperandGenerator g(this);
3151 InstructionOperand temps[] = {g.TempRegister()};
3152 InstructionOperand dst =
3153 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3154 Emit(kIA32F64x2ConvertLowI32x4U, dst, g.UseRegister(node->InputAt(0)),
3155 arraysize(temps), temps);
3156 }
3157
VisitI32x4TruncSatF64x2SZero(Node * node)3158 void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
3159 IA32OperandGenerator g(this);
3160 InstructionOperand temps[] = {g.TempRegister()};
3161 if (IsSupported(AVX)) {
3162 // Requires dst != src.
3163 Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineAsRegister(node),
3164 g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
3165 } else {
3166 Emit(kIA32I32x4TruncSatF64x2SZero, g.DefineSameAsFirst(node),
3167 g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
3168 }
3169 }
3170
VisitI32x4TruncSatF64x2UZero(Node * node)3171 void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
3172 IA32OperandGenerator g(this);
3173 InstructionOperand temps[] = {g.TempRegister()};
3174 InstructionOperand dst =
3175 IsSupported(AVX) ? g.DefineAsRegister(node) : g.DefineSameAsFirst(node);
3176 Emit(kIA32I32x4TruncSatF64x2UZero, dst, g.UseRegister(node->InputAt(0)),
3177 arraysize(temps), temps);
3178 }
3179
VisitI32x4RelaxedTruncF64x2SZero(Node * node)3180 void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
3181 VisitRRSimd(this, node, kIA32Cvttpd2dq);
3182 }
3183
VisitI32x4RelaxedTruncF64x2UZero(Node * node)3184 void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
3185 VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF64x2UZero);
3186 }
3187
VisitI32x4RelaxedTruncF32x4S(Node * node)3188 void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
3189 VisitRRSimd(this, node, kIA32Cvttps2dq);
3190 }
3191
VisitI32x4RelaxedTruncF32x4U(Node * node)3192 void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
3193 VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF32x4U);
3194 }
3195
VisitI64x2GtS(Node * node)3196 void InstructionSelector::VisitI64x2GtS(Node* node) {
3197 IA32OperandGenerator g(this);
3198 if (CpuFeatures::IsSupported(AVX)) {
3199 Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
3200 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
3201 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3202 Emit(kIA32I64x2GtS, g.DefineSameAsFirst(node),
3203 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
3204 } else {
3205 Emit(kIA32I64x2GtS, g.DefineAsRegister(node),
3206 g.UseUniqueRegister(node->InputAt(0)),
3207 g.UseUniqueRegister(node->InputAt(1)));
3208 }
3209 }
3210
VisitI64x2GeS(Node * node)3211 void InstructionSelector::VisitI64x2GeS(Node* node) {
3212 IA32OperandGenerator g(this);
3213 if (CpuFeatures::IsSupported(AVX)) {
3214 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3215 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)));
3216 } else if (CpuFeatures::IsSupported(SSE4_2)) {
3217 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3218 g.UseUniqueRegister(node->InputAt(0)),
3219 g.UseRegister(node->InputAt(1)));
3220 } else {
3221 Emit(kIA32I64x2GeS, g.DefineAsRegister(node),
3222 g.UseUniqueRegister(node->InputAt(0)),
3223 g.UseUniqueRegister(node->InputAt(1)));
3224 }
3225 }
3226
VisitI64x2Abs(Node * node)3227 void InstructionSelector::VisitI64x2Abs(Node* node) {
3228 VisitRRSimd(this, node, kIA32I64x2Abs, kIA32I64x2Abs);
3229 }
3230
VisitF64x2PromoteLowF32x4(Node * node)3231 void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
3232 IA32OperandGenerator g(this);
3233 InstructionCode code = kIA32F64x2PromoteLowF32x4;
3234 Node* input = node->InputAt(0);
3235 LoadTransformMatcher m(input);
3236
3237 if (m.Is(LoadTransformation::kS128Load64Zero) && CanCover(node, input)) {
3238 // Trap handler is not supported on IA32.
3239 DCHECK_NE(m.ResolvedValue().kind, MemoryAccessKind::kProtected);
3240 // LoadTransforms cannot be eliminated, so they are visited even if
3241 // unused. Mark it as defined so that we don't visit it.
3242 MarkAsDefined(input);
3243 VisitLoad(node, input, code);
3244 return;
3245 }
3246
3247 VisitRR(this, node, code);
3248 }
3249
3250 namespace {
3251 // pblendvb is a correct implementation for all the various relaxed lane select,
3252 // see https://github.com/WebAssembly/relaxed-simd/issues/17.
VisitRelaxedLaneSelect(InstructionSelector * selector,Node * node)3253 void VisitRelaxedLaneSelect(InstructionSelector* selector, Node* node) {
3254 IA32OperandGenerator g(selector);
3255 // pblendvb copies src2 when mask is set, opposite from Wasm semantics.
3256 // node's inputs are: mask, lhs, rhs (determined in wasm-compiler.cc).
3257 if (selector->IsSupported(AVX)) {
3258 selector->Emit(kIA32Pblendvb, g.DefineAsRegister(node),
3259 g.UseRegister(node->InputAt(2)),
3260 g.UseRegister(node->InputAt(1)),
3261 g.UseRegister(node->InputAt(0)));
3262 } else {
3263 // SSE4.1 pblendvb requires xmm0 to hold the mask as an implicit operand.
3264 selector->Emit(kIA32Pblendvb, g.DefineSameAsFirst(node),
3265 g.UseRegister(node->InputAt(2)),
3266 g.UseRegister(node->InputAt(1)),
3267 g.UseFixed(node->InputAt(0), xmm0));
3268 }
3269 }
3270 } // namespace
3271
VisitI8x16RelaxedLaneSelect(Node * node)3272 void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
3273 VisitRelaxedLaneSelect(this, node);
3274 }
VisitI16x8RelaxedLaneSelect(Node * node)3275 void InstructionSelector::VisitI16x8RelaxedLaneSelect(Node* node) {
3276 VisitRelaxedLaneSelect(this, node);
3277 }
VisitI32x4RelaxedLaneSelect(Node * node)3278 void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
3279 VisitRelaxedLaneSelect(this, node);
3280 }
VisitI64x2RelaxedLaneSelect(Node * node)3281 void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
3282 VisitRelaxedLaneSelect(this, node);
3283 }
3284
VisitF64x2Qfma(Node * node)3285 void InstructionSelector::VisitF64x2Qfma(Node* node) {
3286 VisitRRRR(this, node, kIA32F64x2Qfma);
3287 }
3288
VisitF64x2Qfms(Node * node)3289 void InstructionSelector::VisitF64x2Qfms(Node* node) {
3290 VisitRRRR(this, node, kIA32F64x2Qfms);
3291 }
3292
VisitF32x4Qfma(Node * node)3293 void InstructionSelector::VisitF32x4Qfma(Node* node) {
3294 VisitRRRR(this, node, kIA32F32x4Qfma);
3295 }
3296
VisitF32x4Qfms(Node * node)3297 void InstructionSelector::VisitF32x4Qfms(Node* node) {
3298 VisitRRRR(this, node, kIA32F32x4Qfms);
3299 }
3300
AddOutputToSelectContinuation(OperandGenerator * g,int first_input_index,Node * node)3301 void InstructionSelector::AddOutputToSelectContinuation(OperandGenerator* g,
3302 int first_input_index,
3303 Node* node) {
3304 UNREACHABLE();
3305 }
3306
3307 // static
3308 MachineOperatorBuilder::Flags
SupportedMachineOperatorFlags()3309 InstructionSelector::SupportedMachineOperatorFlags() {
3310 MachineOperatorBuilder::Flags flags =
3311 MachineOperatorBuilder::kWord32ShiftIsSafe |
3312 MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord32Rol;
3313 if (CpuFeatures::IsSupported(POPCNT)) {
3314 flags |= MachineOperatorBuilder::kWord32Popcnt;
3315 }
3316 if (CpuFeatures::IsSupported(SSE4_1)) {
3317 flags |= MachineOperatorBuilder::kFloat32RoundDown |
3318 MachineOperatorBuilder::kFloat64RoundDown |
3319 MachineOperatorBuilder::kFloat32RoundUp |
3320 MachineOperatorBuilder::kFloat64RoundUp |
3321 MachineOperatorBuilder::kFloat32RoundTruncate |
3322 MachineOperatorBuilder::kFloat64RoundTruncate |
3323 MachineOperatorBuilder::kFloat32RoundTiesEven |
3324 MachineOperatorBuilder::kFloat64RoundTiesEven;
3325 }
3326 return flags;
3327 }
3328
3329 // static
3330 MachineOperatorBuilder::AlignmentRequirements
AlignmentRequirements()3331 InstructionSelector::AlignmentRequirements() {
3332 return MachineOperatorBuilder::AlignmentRequirements::
3333 FullUnalignedAccessSupport();
3334 }
3335
3336 } // namespace compiler
3337 } // namespace internal
3338 } // namespace v8
3339