• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/simd-scalar-lowering.h"
6 
7 #include "src/codegen/machine-type.h"
8 #include "src/common/globals.h"
9 #include "src/compiler/diamond.h"
10 #include "src/compiler/linkage.h"
11 #include "src/compiler/machine-operator.h"
12 #include "src/compiler/node-matchers.h"
13 #include "src/compiler/node-properties.h"
14 #include "src/compiler/node.h"
15 #include "src/compiler/wasm-compiler.h"
16 
17 namespace v8 {
18 namespace internal {
19 namespace compiler {
20 
21 namespace {
22 static const int kNumLanes64 = 2;
23 static const int kNumLanes32 = 4;
24 static const int kNumLanes16 = 8;
25 static const int kNumLanes8 = 16;
26 static const int32_t kMask16 = 0xFFFF;
27 static const int32_t kMask8 = 0xFF;
28 static const int32_t kShift16 = 16;
29 static const int32_t kShift8 = 24;
30 static const int32_t kShiftMask8 = 0x7;
31 static const int32_t kShiftMask16 = 0xF;
32 static const int32_t kShiftMask32 = 0x1F;
33 static const int32_t kShiftMask64 = 0x3F;
34 
35 // Shift values are taken modulo lane size. This helper calculates the mask
36 // required for different shift opcodes.
GetMaskForShift(Node * node)37 int GetMaskForShift(Node* node) {
38   switch (node->opcode()) {
39     case IrOpcode::kI8x16Shl:
40     case IrOpcode::kI8x16ShrS:
41     case IrOpcode::kI8x16ShrU:
42       return kShiftMask8;
43     case IrOpcode::kI16x8Shl:
44     case IrOpcode::kI16x8ShrS:
45     case IrOpcode::kI16x8ShrU:
46       return kShiftMask16;
47     case IrOpcode::kI32x4Shl:
48     case IrOpcode::kI32x4ShrS:
49     case IrOpcode::kI32x4ShrU:
50       return kShiftMask32;
51     case IrOpcode::kI64x2Shl:
52     case IrOpcode::kI64x2ShrS:
53     case IrOpcode::kI64x2ShrU:
54       return kShiftMask64;
55     default:
56       UNIMPLEMENTED();
57   }
58 }
59 }  // anonymous namespace
60 
SimdScalarLowering(MachineGraph * mcgraph,Signature<MachineRepresentation> * signature)61 SimdScalarLowering::SimdScalarLowering(
62     MachineGraph* mcgraph, Signature<MachineRepresentation>* signature)
63     : mcgraph_(mcgraph),
64       state_(mcgraph->graph(), 3),
65       stack_(mcgraph_->zone()),
66       replacements_(nullptr),
67       signature_(signature),
68       placeholder_(graph()->NewNode(common()->Parameter(-2, "placeholder"),
69                                     graph()->start())),
70       parameter_count_after_lowering_(-1) {
71   DCHECK_NOT_NULL(graph());
72   DCHECK_NOT_NULL(graph()->end());
73   replacements_ = zone()->NewArray<Replacement>(graph()->NodeCount());
74   memset(static_cast<void*>(replacements_), 0,
75          sizeof(Replacement) * graph()->NodeCount());
76 }
77 
LowerGraph()78 void SimdScalarLowering::LowerGraph() {
79   stack_.push_back({graph()->end(), 0});
80   state_.Set(graph()->end(), State::kOnStack);
81   replacements_[graph()->end()->id()].type = SimdType::kInt32x4;
82 
83   while (!stack_.empty()) {
84     NodeState& top = stack_.back();
85     if (top.input_index == top.node->InputCount()) {
86       // All inputs of top have already been lowered, now lower top.
87       stack_.pop_back();
88       state_.Set(top.node, State::kVisited);
89       LowerNode(top.node);
90     } else {
91       // Push the next input onto the stack.
92       Node* input = top.node->InputAt(top.input_index++);
93       if (state_.Get(input) == State::kUnvisited) {
94         SetLoweredType(input, top.node);
95         if (input->opcode() == IrOpcode::kPhi) {
96           // To break cycles with phi nodes we push phis on a separate stack so
97           // that they are processed after all other nodes.
98           PreparePhiReplacement(input);
99           stack_.push_front({input, 0});
100         } else if (input->opcode() == IrOpcode::kEffectPhi ||
101                    input->opcode() == IrOpcode::kLoop) {
102           stack_.push_front({input, 0});
103         } else {
104           stack_.push_back({input, 0});
105         }
106         state_.Set(input, State::kOnStack);
107       }
108     }
109   }
110 }
111 
112 #define FOREACH_INT64X2_OPCODE(V) \
113   V(I64x2Splat)                   \
114   V(I64x2ExtractLane)             \
115   V(I64x2ReplaceLane)             \
116   V(I64x2Neg)                     \
117   V(I64x2Shl)                     \
118   V(I64x2ShrS)                    \
119   V(I64x2ShrU)                    \
120   V(I64x2Add)                     \
121   V(I64x2Sub)                     \
122   V(I64x2Mul)
123 
124 #define FOREACH_INT32X4_OPCODE(V) \
125   V(I32x4Splat)                   \
126   V(I32x4ExtractLane)             \
127   V(I32x4ReplaceLane)             \
128   V(I32x4SConvertF32x4)           \
129   V(I32x4UConvertF32x4)           \
130   V(I32x4SConvertI16x8Low)        \
131   V(I32x4SConvertI16x8High)       \
132   V(I32x4Neg)                     \
133   V(I32x4Shl)                     \
134   V(I32x4ShrS)                    \
135   V(I32x4Add)                     \
136   V(I32x4AddHoriz)                \
137   V(I32x4Sub)                     \
138   V(I32x4Mul)                     \
139   V(I32x4MinS)                    \
140   V(I32x4MaxS)                    \
141   V(I32x4ShrU)                    \
142   V(I32x4MinU)                    \
143   V(I32x4MaxU)                    \
144   V(I32x4DotI16x8S)               \
145   V(I32x4Eq)                      \
146   V(I32x4Ne)                      \
147   V(I32x4LtS)                     \
148   V(I32x4LtU)                     \
149   V(I32x4GtS)                     \
150   V(I32x4GtU)                     \
151   V(I32x4LeS)                     \
152   V(I32x4LeU)                     \
153   V(I32x4GeS)                     \
154   V(I32x4GeU)                     \
155   V(I32x4UConvertI16x8Low)        \
156   V(I32x4UConvertI16x8High)       \
157   V(I32x4Abs)                     \
158   V(S128And)                      \
159   V(S128Or)                       \
160   V(S128Xor)                      \
161   V(S128Not)                      \
162   V(S128AndNot)                   \
163   V(S128Select)                   \
164   V(V32x4AnyTrue)                 \
165   V(V32x4AllTrue)                 \
166   V(V16x8AnyTrue)                 \
167   V(V16x8AllTrue)                 \
168   V(V8x16AnyTrue)                 \
169   V(V8x16AllTrue)                 \
170   V(I32x4BitMask)
171 
172 #define FOREACH_FLOAT64X2_OPCODE(V) \
173   V(F64x2Splat)                     \
174   V(F64x2ExtractLane)               \
175   V(F64x2ReplaceLane)               \
176   V(F64x2Abs)                       \
177   V(F64x2Neg)                       \
178   V(F64x2Sqrt)                      \
179   V(F64x2Add)                       \
180   V(F64x2Sub)                       \
181   V(F64x2Mul)                       \
182   V(F64x2Div)                       \
183   V(F64x2Min)                       \
184   V(F64x2Max)                       \
185   V(F64x2Pmin)                      \
186   V(F64x2Pmax)                      \
187   V(F64x2Ceil)                      \
188   V(F64x2Floor)                     \
189   V(F64x2Trunc)                     \
190   V(F64x2NearestInt)
191 
192 #define FOREACH_FLOAT32X4_OPCODE(V) \
193   V(F32x4Splat)                     \
194   V(F32x4ExtractLane)               \
195   V(F32x4ReplaceLane)               \
196   V(F32x4SConvertI32x4)             \
197   V(F32x4UConvertI32x4)             \
198   V(F32x4Abs)                       \
199   V(F32x4Neg)                       \
200   V(F32x4Sqrt)                      \
201   V(F32x4RecipApprox)               \
202   V(F32x4RecipSqrtApprox)           \
203   V(F32x4Add)                       \
204   V(F32x4AddHoriz)                  \
205   V(F32x4Sub)                       \
206   V(F32x4Mul)                       \
207   V(F32x4Div)                       \
208   V(F32x4Min)                       \
209   V(F32x4Max)                       \
210   V(F32x4Pmin)                      \
211   V(F32x4Pmax)                      \
212   V(F32x4Ceil)                      \
213   V(F32x4Floor)                     \
214   V(F32x4Trunc)                     \
215   V(F32x4NearestInt)
216 
217 #define FOREACH_FLOAT64x2_TO_INT64x2OPCODE(V) \
218   V(F64x2Eq)                                  \
219   V(F64x2Ne)                                  \
220   V(F64x2Lt)                                  \
221   V(F64x2Le)
222 
223 #define FOREACH_FLOAT32X4_TO_INT32X4OPCODE(V) \
224   V(F32x4Eq)                                  \
225   V(F32x4Ne)                                  \
226   V(F32x4Lt)                                  \
227   V(F32x4Le)                                  \
228   V(F32x4Gt)                                  \
229   V(F32x4Ge)
230 
231 #define FOREACH_INT16X8_OPCODE(V) \
232   V(I16x8Splat)                   \
233   V(I16x8ExtractLaneU)            \
234   V(I16x8ExtractLaneS)            \
235   V(I16x8ReplaceLane)             \
236   V(I16x8SConvertI8x16Low)        \
237   V(I16x8SConvertI8x16High)       \
238   V(I16x8Neg)                     \
239   V(I16x8Shl)                     \
240   V(I16x8ShrS)                    \
241   V(I16x8SConvertI32x4)           \
242   V(I16x8Add)                     \
243   V(I16x8AddSatS)                 \
244   V(I16x8AddHoriz)                \
245   V(I16x8Sub)                     \
246   V(I16x8SubSatS)                 \
247   V(I16x8Mul)                     \
248   V(I16x8MinS)                    \
249   V(I16x8MaxS)                    \
250   V(I16x8UConvertI8x16Low)        \
251   V(I16x8UConvertI8x16High)       \
252   V(I16x8ShrU)                    \
253   V(I16x8UConvertI32x4)           \
254   V(I16x8AddSatU)                 \
255   V(I16x8SubSatU)                 \
256   V(I16x8MinU)                    \
257   V(I16x8MaxU)                    \
258   V(I16x8Eq)                      \
259   V(I16x8Ne)                      \
260   V(I16x8LtS)                     \
261   V(I16x8LtU)                     \
262   V(I16x8GtS)                     \
263   V(I16x8GtU)                     \
264   V(I16x8LeS)                     \
265   V(I16x8LeU)                     \
266   V(I16x8GeS)                     \
267   V(I16x8GeU)                     \
268   V(I16x8RoundingAverageU)        \
269   V(I16x8Abs)                     \
270   V(I16x8BitMask)
271 
272 #define FOREACH_INT8X16_OPCODE(V) \
273   V(I8x16Splat)                   \
274   V(I8x16ExtractLaneU)            \
275   V(I8x16ExtractLaneS)            \
276   V(I8x16ReplaceLane)             \
277   V(I8x16SConvertI16x8)           \
278   V(I8x16Neg)                     \
279   V(I8x16Shl)                     \
280   V(I8x16ShrS)                    \
281   V(I8x16Add)                     \
282   V(I8x16AddSatS)                 \
283   V(I8x16Sub)                     \
284   V(I8x16SubSatS)                 \
285   V(I8x16Mul)                     \
286   V(I8x16MinS)                    \
287   V(I8x16MaxS)                    \
288   V(I8x16ShrU)                    \
289   V(I8x16UConvertI16x8)           \
290   V(I8x16AddSatU)                 \
291   V(I8x16SubSatU)                 \
292   V(I8x16MinU)                    \
293   V(I8x16MaxU)                    \
294   V(I8x16Eq)                      \
295   V(I8x16Ne)                      \
296   V(I8x16LtS)                     \
297   V(I8x16LtU)                     \
298   V(I8x16GtS)                     \
299   V(I8x16GtU)                     \
300   V(I8x16LeS)                     \
301   V(I8x16LeU)                     \
302   V(I8x16GeS)                     \
303   V(I8x16GeU)                     \
304   V(I8x16Swizzle)                 \
305   V(I8x16Shuffle)                 \
306   V(I8x16RoundingAverageU)        \
307   V(I8x16Abs)                     \
308   V(I8x16BitMask)
309 
MachineTypeFrom(SimdType simdType)310 MachineType SimdScalarLowering::MachineTypeFrom(SimdType simdType) {
311   switch (simdType) {
312     case SimdType::kFloat64x2:
313       return MachineType::Float64();
314     case SimdType::kFloat32x4:
315       return MachineType::Float32();
316     case SimdType::kInt64x2:
317       return MachineType::Int64();
318     case SimdType::kInt32x4:
319       return MachineType::Int32();
320     case SimdType::kInt16x8:
321       return MachineType::Int16();
322     case SimdType::kInt8x16:
323       return MachineType::Int8();
324   }
325   return MachineType::None();
326 }
327 
SetLoweredType(Node * node,Node * output)328 void SimdScalarLowering::SetLoweredType(Node* node, Node* output) {
329   switch (node->opcode()) {
330 #define CASE_STMT(name) case IrOpcode::k##name:
331     FOREACH_FLOAT64X2_OPCODE(CASE_STMT) {
332       replacements_[node->id()].type = SimdType::kFloat64x2;
333       break;
334     }
335     FOREACH_INT64X2_OPCODE(CASE_STMT) {
336       replacements_[node->id()].type = SimdType::kInt64x2;
337       break;
338     }
339     FOREACH_INT32X4_OPCODE(CASE_STMT)
340     case IrOpcode::kReturn:
341     case IrOpcode::kParameter:
342     case IrOpcode::kPhi:
343     case IrOpcode::kCall: {
344       replacements_[node->id()].type = SimdType::kInt32x4;
345       break;
346     }
347       FOREACH_FLOAT32X4_OPCODE(CASE_STMT) {
348         replacements_[node->id()].type = SimdType::kFloat32x4;
349         break;
350       }
351       FOREACH_FLOAT32X4_TO_INT32X4OPCODE(CASE_STMT) {
352         replacements_[node->id()].type = SimdType::kInt32x4;
353         break;
354       }
355       FOREACH_FLOAT64x2_TO_INT64x2OPCODE(CASE_STMT) {
356         replacements_[node->id()].type = SimdType::kInt64x2;
357         break;
358       }
359       FOREACH_INT16X8_OPCODE(CASE_STMT) {
360         replacements_[node->id()].type = SimdType::kInt16x8;
361         break;
362       }
363       FOREACH_INT8X16_OPCODE(CASE_STMT) {
364         replacements_[node->id()].type = SimdType::kInt8x16;
365         break;
366       }
367     case IrOpcode::kLoadTransform: {
368       LoadTransformParameters params = LoadTransformParametersOf(node->op());
369       switch (params.transformation) {
370         case LoadTransformation::kS128Load8Splat:
371           replacements_[node->id()].type = SimdType::kInt8x16;
372           break;
373         case LoadTransformation::kS128Load16Splat:
374         case LoadTransformation::kS128Load8x8S:
375         case LoadTransformation::kS128Load8x8U:
376           replacements_[node->id()].type = SimdType::kInt16x8;
377           break;
378         case LoadTransformation::kS128Load32Splat:
379         case LoadTransformation::kS128Load16x4S:
380         case LoadTransformation::kS128Load16x4U:
381         case LoadTransformation::kS128Load32Zero:
382           replacements_[node->id()].type = SimdType::kInt32x4;
383           break;
384         case LoadTransformation::kS128Load64Splat:
385         case LoadTransformation::kS128Load32x2S:
386         case LoadTransformation::kS128Load32x2U:
387         case LoadTransformation::kS128Load64Zero:
388           replacements_[node->id()].type = SimdType::kInt64x2;
389           break;
390         default:
391           UNIMPLEMENTED();
392       }
393       break;
394     }
395     default: {
396       switch (output->opcode()) {
397         case IrOpcode::kF32x4SConvertI32x4:
398         case IrOpcode::kF32x4UConvertI32x4:
399         case IrOpcode::kI16x8SConvertI32x4:
400         case IrOpcode::kI16x8UConvertI32x4: {
401           replacements_[node->id()].type = SimdType::kInt32x4;
402           break;
403         }
404         case IrOpcode::kI8x16SConvertI16x8:
405         case IrOpcode::kI8x16UConvertI16x8:
406         case IrOpcode::kI32x4SConvertI16x8Low:
407         case IrOpcode::kI32x4SConvertI16x8High:
408         case IrOpcode::kI32x4UConvertI16x8Low:
409         case IrOpcode::kI32x4UConvertI16x8High: {
410           replacements_[node->id()].type = SimdType::kInt16x8;
411           break;
412         }
413         case IrOpcode::kI16x8SConvertI8x16Low:
414         case IrOpcode::kI16x8SConvertI8x16High:
415         case IrOpcode::kI16x8UConvertI8x16Low:
416         case IrOpcode::kI16x8UConvertI8x16High: {
417           replacements_[node->id()].type = SimdType::kInt8x16;
418           break;
419         }
420           FOREACH_FLOAT32X4_TO_INT32X4OPCODE(CASE_STMT)
421         case IrOpcode::kI32x4SConvertF32x4:
422         case IrOpcode::kI32x4UConvertF32x4: {
423           replacements_[node->id()].type = SimdType::kFloat32x4;
424           break;
425         }
426         case IrOpcode::kS128Select: {
427           replacements_[node->id()].type = SimdType::kInt32x4;
428           break;
429         }
430         default: {
431           replacements_[node->id()].type = replacements_[output->id()].type;
432         }
433       }
434     }
435 #undef CASE_STMT
436   }
437 }
438 
GetParameterIndexAfterLoweringSimd128(Signature<MachineRepresentation> * signature,int old_index)439 static int GetParameterIndexAfterLoweringSimd128(
440     Signature<MachineRepresentation>* signature, int old_index) {
441   // In function calls, the simd128 types are passed as 4 Int32 types. The
442   // parameters are typecast to the types as needed for various operations.
443   int result = old_index;
444   for (int i = 0; i < old_index; ++i) {
445     if (signature->GetParam(i) == MachineRepresentation::kSimd128) {
446       result += 3;
447     }
448   }
449   return result;
450 }
451 
GetParameterCountAfterLowering()452 int SimdScalarLowering::GetParameterCountAfterLowering() {
453   if (parameter_count_after_lowering_ == -1) {
454     // GetParameterIndexAfterLoweringSimd128(parameter_count) returns the
455     // parameter count after lowering.
456     parameter_count_after_lowering_ = GetParameterIndexAfterLoweringSimd128(
457         signature(), static_cast<int>(signature()->parameter_count()));
458   }
459   return parameter_count_after_lowering_;
460 }
461 
GetReturnCountAfterLoweringSimd128(Signature<MachineRepresentation> * signature)462 static int GetReturnCountAfterLoweringSimd128(
463     Signature<MachineRepresentation>* signature) {
464   int result = static_cast<int>(signature->return_count());
465   for (int i = 0; i < static_cast<int>(signature->return_count()); ++i) {
466     if (signature->GetReturn(i) == MachineRepresentation::kSimd128) {
467       result += 3;
468     }
469   }
470   return result;
471 }
472 
GetReturnIndexAfterLowering(const CallDescriptor * call_descriptor,int old_index)473 int GetReturnIndexAfterLowering(const CallDescriptor* call_descriptor,
474                                 int old_index) {
475   int result = old_index;
476   for (int i = 0; i < old_index; ++i) {
477     if (call_descriptor->GetReturnType(i).representation() ==
478         MachineRepresentation::kSimd128) {
479       result += kNumLanes32 - 1;
480     }
481   }
482   return result;
483 }
484 
GetReturnCountAfterLoweringSimd128(const CallDescriptor * call_descriptor)485 static int GetReturnCountAfterLoweringSimd128(
486     const CallDescriptor* call_descriptor) {
487   return GetReturnIndexAfterLowering(
488       call_descriptor, static_cast<int>(call_descriptor->ReturnCount()));
489 }
490 
NumLanes(SimdType type)491 int SimdScalarLowering::NumLanes(SimdType type) {
492   int num_lanes = 0;
493   if (type == SimdType::kFloat64x2 || type == SimdType::kInt64x2) {
494     num_lanes = kNumLanes64;
495   } else if (type == SimdType::kFloat32x4 || type == SimdType::kInt32x4) {
496     num_lanes = kNumLanes32;
497   } else if (type == SimdType::kInt16x8) {
498     num_lanes = kNumLanes16;
499   } else if (type == SimdType::kInt8x16) {
500     num_lanes = kNumLanes8;
501   } else {
502     UNREACHABLE();
503   }
504   return num_lanes;
505 }
506 
507 constexpr int SimdScalarLowering::kLaneOffsets[];
508 
GetIndexNodes(Node * index,Node ** new_indices,SimdType type)509 void SimdScalarLowering::GetIndexNodes(Node* index, Node** new_indices,
510                                        SimdType type) {
511   int num_lanes = NumLanes(type);
512   int lane_width = kSimd128Size / num_lanes;
513   int laneIndex = kLaneOffsets[0] / lane_width;
514 
515   Node* rep = index;
516 
517   if (HasReplacement(0, index)) {
518     // Index nodes are lowered to scalar nodes.
519     DCHECK_EQ(1, ReplacementCount(index));
520     rep = GetReplacements(index)[0];
521   }
522 
523   new_indices[laneIndex] = rep;
524   for (int i = 1; i < num_lanes; ++i) {
525     laneIndex = kLaneOffsets[i * lane_width] / lane_width;
526     new_indices[laneIndex] = graph()->NewNode(
527         machine()->Int32Add(), rep,
528         graph()->NewNode(
529             common()->Int32Constant(static_cast<int>(i) * lane_width)));
530   }
531 }
532 
LowerLoadOp(Node * node,SimdType type)533 void SimdScalarLowering::LowerLoadOp(Node* node, SimdType type) {
534   MachineRepresentation rep = LoadRepresentationOf(node->op()).representation();
535   const Operator* load_op;
536   switch (node->opcode()) {
537     case IrOpcode::kLoad:
538       load_op = machine()->Load(MachineTypeFrom(type));
539       break;
540     case IrOpcode::kUnalignedLoad:
541       load_op = machine()->UnalignedLoad(MachineTypeFrom(type));
542       break;
543     case IrOpcode::kProtectedLoad:
544       load_op = machine()->ProtectedLoad(MachineTypeFrom(type));
545       break;
546     default:
547       UNREACHABLE();
548   }
549   if (rep == MachineRepresentation::kSimd128) {
550     Node* base = node->InputAt(0);
551     Node* index = node->InputAt(1);
552     int num_lanes = NumLanes(type);
553     Node** indices = zone()->NewArray<Node*>(num_lanes);
554     GetIndexNodes(index, indices, type);
555     Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
556     rep_nodes[0] = node;
557     rep_nodes[0]->ReplaceInput(1, indices[0]);
558     NodeProperties::ChangeOp(rep_nodes[0], load_op);
559     if (node->InputCount() > 2) {
560       DCHECK_LT(3, node->InputCount());
561       Node* effect_input = node->InputAt(2);
562       Node* control_input = node->InputAt(3);
563       for (int i = num_lanes - 1; i > 0; --i) {
564         rep_nodes[i] = graph()->NewNode(load_op, base, indices[i], effect_input,
565                                         control_input);
566         effect_input = rep_nodes[i];
567       }
568       rep_nodes[0]->ReplaceInput(2, rep_nodes[1]);
569     } else {
570       for (int i = 1; i < num_lanes; ++i) {
571         rep_nodes[i] = graph()->NewNode(load_op, base, indices[i]);
572       }
573     }
574     ReplaceNode(node, rep_nodes, num_lanes);
575   } else {
576     DefaultLowering(node);
577   }
578 }
579 
LowerLoadTransformOp(Node * node,SimdType type)580 void SimdScalarLowering::LowerLoadTransformOp(Node* node, SimdType type) {
581   LoadTransformParameters params = LoadTransformParametersOf(node->op());
582   MachineType load_rep = MachineType::None();
583   SimdType load_type = type;
584 
585   // Load extends have a different machine type for loading.
586   switch (params.transformation) {
587     case LoadTransformation::kS128Load8x8S:
588       load_rep = MachineType::Int8();
589       load_type = SimdType::kInt8x16;
590       break;
591     case LoadTransformation::kS128Load8x8U:
592       load_rep = MachineType::Uint8();
593       load_type = SimdType::kInt8x16;
594       break;
595     case LoadTransformation::kS128Load16x4S:
596       load_rep = MachineType::Int16();
597       load_type = SimdType::kInt16x8;
598       break;
599     case LoadTransformation::kS128Load16x4U:
600       load_rep = MachineType::Uint16();
601       load_type = SimdType::kInt16x8;
602       break;
603     case LoadTransformation::kS128Load32x2S:
604       load_rep = MachineType::Int32();
605       load_type = SimdType::kInt32x4;
606       break;
607     case LoadTransformation::kS128Load32x2U:
608       load_rep = MachineType::Uint32();
609       load_type = SimdType::kInt32x4;
610       break;
611     case LoadTransformation::kS128Load8Splat:
612     case LoadTransformation::kS128Load16Splat:
613     case LoadTransformation::kS128Load32Splat:
614     case LoadTransformation::kS128Load64Splat:
615     case LoadTransformation::kS128Load32Zero:
616     case LoadTransformation::kS128Load64Zero:
617       load_rep = MachineTypeFrom(type);
618       break;
619     default:
620       UNREACHABLE();
621   }
622 
623   DCHECK_NE(load_rep, MachineType::None());
624 
625   const Operator* load_op;
626   switch (params.kind) {
627     case MemoryAccessKind::kNormal:
628       load_op = machine()->Load(load_rep);
629       break;
630     case MemoryAccessKind::kUnaligned:
631       load_op = machine()->UnalignedLoad(load_rep);
632       break;
633     case MemoryAccessKind::kProtected:
634       load_op = machine()->ProtectedLoad(load_rep);
635       break;
636   }
637 
638   Node* base = node->InputAt(0);
639   Node* index = node->InputAt(1);
640   int num_lanes = NumLanes(type);
641   Node** reps = zone()->NewArray<Node*>(num_lanes);
642   Node* effect_input = node->InputAt(2);
643   Node* control_input = node->InputAt(3);
644 
645   // This node is also used as effect input into other nodes, so we need to
646   // change this node in place.
647   reps[0] = node;
648   NodeProperties::ChangeOp(reps[0], load_op);
649 
650   if (type != load_type) {
651     // We load a smaller lane size, then extend to a larger lane size. So use
652     // the smaller lane size to calculte the index nodes for loads, but only
653     // actually load half of those lanes.
654     Node** indices = zone()->NewArray<Node*>(num_lanes * 2);
655     GetIndexNodes(index, indices, load_type);
656 
657     reps[0]->ReplaceInput(1, indices[0]);
658 
659     for (int i = num_lanes - 1; i > 0; --i) {
660       reps[i] = graph()->NewNode(load_op, base, indices[i], effect_input,
661                                  control_input);
662       effect_input = reps[i];
663     }
664   } else {
665     if (params.transformation == LoadTransformation::kS128Load32Zero) {
666       for (int i = num_lanes - 1; i > 0; --i) {
667         reps[i] = mcgraph_->Int32Constant(0);
668       }
669     } else if (params.transformation == LoadTransformation::kS128Load64Zero) {
670       for (int i = num_lanes - 1; i > 0; --i) {
671         reps[i] = mcgraph_->Int64Constant(0);
672       }
673     } else {
674       // Load splat, load from the same index for every lane.
675       Node* rep = HasReplacement(0, index) ? GetReplacements(index)[0] : index;
676 
677       // Replace first node, we only called ChangeOp above.
678       reps[0]->ReplaceInput(1, rep);
679       for (int i = num_lanes - 1; i > 0; --i) {
680         reps[i] =
681             graph()->NewNode(load_op, base, rep, effect_input, control_input);
682         effect_input = reps[i];
683       }
684     }
685   }
686 
687   // Update the effect input, completing the effect chain, but only if there is
688   // an effect output (LoadZero does not have an effect output, it is zero).
689   if (reps[1]->op()->EffectOutputCount() > 0) {
690     reps[0]->ReplaceInput(2, reps[1]);
691   }
692 
693   // Special case, the load nodes need to be sign extended, and we do it here so
694   // the loop above can connect all the effect edges correctly.
695   if (params.transformation == LoadTransformation::kS128Load32x2S) {
696     for (int i = 0; i < num_lanes; ++i) {
697       reps[i] = graph()->NewNode(machine()->ChangeInt32ToInt64(), reps[i]);
698     }
699   } else if (params.transformation == LoadTransformation::kS128Load32x2U) {
700     for (int i = 0; i < num_lanes; ++i) {
701       reps[i] = graph()->NewNode(machine()->ChangeUint32ToUint64(), reps[i]);
702     }
703   }
704 
705   ReplaceNode(node, reps, num_lanes);
706 }
707 
LowerStoreOp(Node * node)708 void SimdScalarLowering::LowerStoreOp(Node* node) {
709   // For store operation, use replacement type of its input instead of the
710   // one of its effected node.
711   DCHECK_LT(2, node->InputCount());
712   SimdType rep_type = ReplacementType(node->InputAt(2));
713   replacements_[node->id()].type = rep_type;
714   const Operator* store_op;
715   MachineRepresentation rep;
716   switch (node->opcode()) {
717     case IrOpcode::kStore: {
718       rep = StoreRepresentationOf(node->op()).representation();
719       WriteBarrierKind write_barrier_kind =
720           StoreRepresentationOf(node->op()).write_barrier_kind();
721       store_op = machine()->Store(StoreRepresentation(
722           MachineTypeFrom(rep_type).representation(), write_barrier_kind));
723       break;
724     }
725     case IrOpcode::kUnalignedStore: {
726       rep = UnalignedStoreRepresentationOf(node->op());
727       store_op =
728           machine()->UnalignedStore(MachineTypeFrom(rep_type).representation());
729       break;
730     }
731     case IrOpcode::kProtectedStore: {
732       rep = StoreRepresentationOf(node->op()).representation();
733       store_op =
734           machine()->ProtectedStore(MachineTypeFrom(rep_type).representation());
735       break;
736     }
737     default:
738       UNREACHABLE();
739   }
740   if (rep == MachineRepresentation::kSimd128) {
741     Node* base = node->InputAt(0);
742     Node* index = node->InputAt(1);
743     int num_lanes = NumLanes(rep_type);
744     Node** indices = zone()->NewArray<Node*>(num_lanes);
745     GetIndexNodes(index, indices, rep_type);
746     Node* value = node->InputAt(2);
747     DCHECK(HasReplacement(1, value));
748     Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
749     rep_nodes[0] = node;
750     Node** rep_inputs = GetReplacementsWithType(value, rep_type);
751     rep_nodes[0]->ReplaceInput(2, rep_inputs[0]);
752     rep_nodes[0]->ReplaceInput(1, indices[0]);
753     NodeProperties::ChangeOp(node, store_op);
754     if (node->InputCount() > 3) {
755       DCHECK_LT(4, node->InputCount());
756       Node* effect_input = node->InputAt(3);
757       Node* control_input = node->InputAt(4);
758       for (int i = num_lanes - 1; i > 0; --i) {
759         rep_nodes[i] =
760             graph()->NewNode(store_op, base, indices[i], rep_inputs[i],
761                              effect_input, control_input);
762         effect_input = rep_nodes[i];
763       }
764       rep_nodes[0]->ReplaceInput(3, rep_nodes[1]);
765     } else {
766       for (int i = 1; i < num_lanes; ++i) {
767         rep_nodes[i] =
768             graph()->NewNode(store_op, base, indices[i], rep_inputs[i]);
769       }
770     }
771     ReplaceNode(node, rep_nodes, num_lanes);
772   } else {
773     DefaultLowering(node);
774   }
775 }
776 
LowerBinaryOp(Node * node,SimdType input_rep_type,const Operator * op,bool not_horizontal)777 void SimdScalarLowering::LowerBinaryOp(Node* node, SimdType input_rep_type,
778                                        const Operator* op,
779                                        bool not_horizontal) {
780   DCHECK_EQ(2, node->InputCount());
781   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
782   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
783   int num_lanes = NumLanes(input_rep_type);
784   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
785   if (not_horizontal) {
786     for (int i = 0; i < num_lanes; ++i) {
787       rep_node[i] = graph()->NewNode(op, rep_left[i], rep_right[i]);
788     }
789   } else {
790     for (int i = 0; i < num_lanes / 2; ++i) {
791       rep_node[i] = graph()->NewNode(op, rep_left[i * 2], rep_left[i * 2 + 1]);
792       rep_node[i + num_lanes / 2] =
793           graph()->NewNode(op, rep_right[i * 2], rep_right[i * 2 + 1]);
794     }
795   }
796   ReplaceNode(node, rep_node, num_lanes);
797 }
798 
LowerCompareOp(Node * node,SimdType input_rep_type,const Operator * op,bool invert_inputs)799 void SimdScalarLowering::LowerCompareOp(Node* node, SimdType input_rep_type,
800                                         const Operator* op,
801                                         bool invert_inputs) {
802   DCHECK_EQ(2, node->InputCount());
803   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
804   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
805   int num_lanes = NumLanes(input_rep_type);
806   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
807   for (int i = 0; i < num_lanes; ++i) {
808     Node* cmp_result = nullptr;
809     if (invert_inputs) {
810       cmp_result = graph()->NewNode(op, rep_right[i], rep_left[i]);
811     } else {
812       cmp_result = graph()->NewNode(op, rep_left[i], rep_right[i]);
813     }
814     Diamond d_cmp(graph(), common(), cmp_result);
815     rep_node[i] = ConstructPhiForComparison(d_cmp, input_rep_type, -1, 0);
816   }
817   ReplaceNode(node, rep_node, num_lanes);
818 }
819 
FixUpperBits(Node * input,int32_t shift)820 Node* SimdScalarLowering::FixUpperBits(Node* input, int32_t shift) {
821   return graph()->NewNode(machine()->Word32Sar(),
822                           graph()->NewNode(machine()->Word32Shl(), input,
823                                            mcgraph_->Int32Constant(shift)),
824                           mcgraph_->Int32Constant(shift));
825 }
826 
LowerBinaryOpForSmallInt(Node * node,SimdType input_rep_type,const Operator * op,bool not_horizontal)827 void SimdScalarLowering::LowerBinaryOpForSmallInt(Node* node,
828                                                   SimdType input_rep_type,
829                                                   const Operator* op,
830                                                   bool not_horizontal) {
831   DCHECK_EQ(2, node->InputCount());
832   DCHECK(input_rep_type == SimdType::kInt16x8 ||
833          input_rep_type == SimdType::kInt8x16);
834   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
835   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
836   int num_lanes = NumLanes(input_rep_type);
837   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
838   int32_t shift_val =
839       (input_rep_type == SimdType::kInt16x8) ? kShift16 : kShift8;
840   if (not_horizontal) {
841     for (int i = 0; i < num_lanes; ++i) {
842       rep_node[i] = FixUpperBits(
843           graph()->NewNode(op, rep_left[i], rep_right[i]), shift_val);
844     }
845   } else {
846     for (int i = 0; i < num_lanes / 2; ++i) {
847       rep_node[i] = FixUpperBits(
848           graph()->NewNode(op, rep_left[i * 2], rep_left[i * 2 + 1]),
849           shift_val);
850       rep_node[i + num_lanes / 2] = FixUpperBits(
851           graph()->NewNode(op, rep_right[i * 2], rep_right[i * 2 + 1]),
852           shift_val);
853     }
854   }
855   ReplaceNode(node, rep_node, num_lanes);
856 }
857 
Mask(Node * input,int32_t mask)858 Node* SimdScalarLowering::Mask(Node* input, int32_t mask) {
859   return graph()->NewNode(machine()->Word32And(), input,
860                           mcgraph_->Int32Constant(mask));
861 }
862 
LowerSaturateBinaryOp(Node * node,SimdType input_rep_type,const Operator * op,bool is_signed)863 void SimdScalarLowering::LowerSaturateBinaryOp(Node* node,
864                                                SimdType input_rep_type,
865                                                const Operator* op,
866                                                bool is_signed) {
867   DCHECK_EQ(2, node->InputCount());
868   DCHECK(input_rep_type == SimdType::kInt16x8 ||
869          input_rep_type == SimdType::kInt8x16);
870   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
871   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
872   int32_t min = 0;
873   int32_t max = 0;
874   int32_t mask = 0;
875   int32_t shift_val = 0;
876   MachineRepresentation phi_rep;
877   if (input_rep_type == SimdType::kInt16x8) {
878     if (is_signed) {
879       min = std::numeric_limits<int16_t>::min();
880       max = std::numeric_limits<int16_t>::max();
881     } else {
882       min = std::numeric_limits<uint16_t>::min();
883       max = std::numeric_limits<uint16_t>::max();
884     }
885     mask = kMask16;
886     shift_val = kShift16;
887     phi_rep = MachineRepresentation::kWord16;
888   } else {
889     if (is_signed) {
890       min = std::numeric_limits<int8_t>::min();
891       max = std::numeric_limits<int8_t>::max();
892     } else {
893       min = std::numeric_limits<uint8_t>::min();
894       max = std::numeric_limits<uint8_t>::max();
895     }
896     mask = kMask8;
897     shift_val = kShift8;
898     phi_rep = MachineRepresentation::kWord8;
899   }
900   int num_lanes = NumLanes(input_rep_type);
901   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
902   for (int i = 0; i < num_lanes; ++i) {
903     Node* op_result = nullptr;
904     Node* left = is_signed ? rep_left[i] : Mask(rep_left[i], mask);
905     Node* right = is_signed ? rep_right[i] : Mask(rep_right[i], mask);
906     op_result = graph()->NewNode(op, left, right);
907     Diamond d_min(graph(), common(),
908                   graph()->NewNode(machine()->Int32LessThan(), op_result,
909                                    mcgraph_->Int32Constant(min)));
910     rep_node[i] = d_min.Phi(phi_rep, mcgraph_->Int32Constant(min), op_result);
911     Diamond d_max(graph(), common(),
912                   graph()->NewNode(machine()->Int32LessThan(),
913                                    mcgraph_->Int32Constant(max), rep_node[i]));
914     rep_node[i] = d_max.Phi(phi_rep, mcgraph_->Int32Constant(max), rep_node[i]);
915     rep_node[i] =
916         is_signed ? rep_node[i] : FixUpperBits(rep_node[i], shift_val);
917   }
918   ReplaceNode(node, rep_node, num_lanes);
919 }
920 
LowerUnaryOp(Node * node,SimdType input_rep_type,const Operator * op)921 void SimdScalarLowering::LowerUnaryOp(Node* node, SimdType input_rep_type,
922                                       const Operator* op) {
923   DCHECK_EQ(1, node->InputCount());
924   Node** rep = GetReplacementsWithType(node->InputAt(0), input_rep_type);
925   int num_lanes = NumLanes(input_rep_type);
926   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
927   for (int i = 0; i < num_lanes; ++i) {
928     rep_node[i] = graph()->NewNode(op, rep[i]);
929   }
930   ReplaceNode(node, rep_node, num_lanes);
931 }
932 
LowerIntMinMax(Node * node,const Operator * op,bool is_max,SimdType type)933 void SimdScalarLowering::LowerIntMinMax(Node* node, const Operator* op,
934                                         bool is_max, SimdType type) {
935   DCHECK_EQ(2, node->InputCount());
936   Node** rep_left = GetReplacementsWithType(node->InputAt(0), type);
937   Node** rep_right = GetReplacementsWithType(node->InputAt(1), type);
938   int num_lanes = NumLanes(type);
939   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
940   MachineRepresentation rep = MachineRepresentation::kNone;
941   if (type == SimdType::kInt32x4) {
942     rep = MachineRepresentation::kWord32;
943   } else if (type == SimdType::kInt16x8) {
944     rep = MachineRepresentation::kWord16;
945   } else if (type == SimdType::kInt8x16) {
946     rep = MachineRepresentation::kWord8;
947   } else {
948     UNREACHABLE();
949   }
950   for (int i = 0; i < num_lanes; ++i) {
951     Diamond d(graph(), common(),
952               graph()->NewNode(op, rep_left[i], rep_right[i]));
953     if (is_max) {
954       rep_node[i] = d.Phi(rep, rep_right[i], rep_left[i]);
955     } else {
956       rep_node[i] = d.Phi(rep, rep_left[i], rep_right[i]);
957     }
958   }
959   ReplaceNode(node, rep_node, num_lanes);
960 }
961 
BuildF64Trunc(Node * input)962 Node* SimdScalarLowering::BuildF64Trunc(Node* input) {
963   if (machine()->Float64RoundTruncate().IsSupported()) {
964     return graph()->NewNode(machine()->Float64RoundTruncate().op(), input);
965   } else {
966     ExternalReference ref = ExternalReference::wasm_f64_trunc();
967     Node* stack_slot =
968         graph()->NewNode(machine()->StackSlot(MachineRepresentation::kFloat64));
969     const Operator* store_op = machine()->Store(
970         StoreRepresentation(MachineRepresentation::kFloat64, kNoWriteBarrier));
971     Node* effect =
972         graph()->NewNode(store_op, stack_slot, mcgraph_->Int32Constant(0),
973                          input, graph()->start(), graph()->start());
974     Node* function = graph()->NewNode(common()->ExternalConstant(ref));
975     Node** args = zone()->NewArray<Node*>(4);
976     args[0] = function;
977     args[1] = stack_slot;
978     args[2] = effect;
979     args[3] = graph()->start();
980     Signature<MachineType>::Builder sig_builder(zone(), 0, 1);
981     sig_builder.AddParam(MachineType::Pointer());
982     auto call_descriptor =
983         Linkage::GetSimplifiedCDescriptor(zone(), sig_builder.Build());
984     Node* call = graph()->NewNode(common()->Call(call_descriptor), 4, args);
985     return graph()->NewNode(machine()->Load(LoadRepresentation::Float64()),
986                             stack_slot, mcgraph_->Int32Constant(0), call,
987                             graph()->start());
988   }
989 }
990 
LowerConvertFromFloat(Node * node,bool is_signed)991 void SimdScalarLowering::LowerConvertFromFloat(Node* node, bool is_signed) {
992   DCHECK_EQ(1, node->InputCount());
993   Node** rep = GetReplacementsWithType(node->InputAt(0), SimdType::kFloat32x4);
994   Node* rep_node[kNumLanes32];
995   Node* double_zero = graph()->NewNode(common()->Float64Constant(0.0));
996   Node* min = graph()->NewNode(
997       common()->Float64Constant(static_cast<double>(is_signed ? kMinInt : 0)));
998   Node* max = graph()->NewNode(common()->Float64Constant(
999       static_cast<double>(is_signed ? kMaxInt : 0xFFFFFFFFu)));
1000   for (int i = 0; i < kNumLanes32; ++i) {
1001     Node* double_rep =
1002         graph()->NewNode(machine()->ChangeFloat32ToFloat64(), rep[i]);
1003     Diamond nan_d(
1004         graph(), common(),
1005         graph()->NewNode(machine()->Float64Equal(), double_rep, double_rep));
1006     Node* temp =
1007         nan_d.Phi(MachineRepresentation::kFloat64, double_rep, double_zero);
1008     Diamond min_d(graph(), common(),
1009                   graph()->NewNode(machine()->Float64LessThan(), temp, min));
1010     temp = min_d.Phi(MachineRepresentation::kFloat64, min, temp);
1011     Diamond max_d(graph(), common(),
1012                   graph()->NewNode(machine()->Float64LessThan(), max, temp));
1013     temp = max_d.Phi(MachineRepresentation::kFloat64, max, temp);
1014     Node* trunc = BuildF64Trunc(temp);
1015     if (is_signed) {
1016       rep_node[i] = graph()->NewNode(machine()->ChangeFloat64ToInt32(), trunc);
1017     } else {
1018       rep_node[i] =
1019           graph()->NewNode(machine()->TruncateFloat64ToUint32(), trunc);
1020     }
1021   }
1022   ReplaceNode(node, rep_node, kNumLanes32);
1023 }
1024 
LowerConvertFromInt(Node * node,SimdType input_rep_type,SimdType output_rep_type,bool is_signed,int start_index)1025 void SimdScalarLowering::LowerConvertFromInt(Node* node,
1026                                              SimdType input_rep_type,
1027                                              SimdType output_rep_type,
1028                                              bool is_signed, int start_index) {
1029   DCHECK_EQ(1, node->InputCount());
1030   Node** rep = GetReplacementsWithType(node->InputAt(0), input_rep_type);
1031 
1032   int32_t mask = 0;
1033   if (input_rep_type == SimdType::kInt16x8) {
1034     DCHECK_EQ(output_rep_type, SimdType::kInt32x4);
1035     mask = kMask16;
1036   } else {
1037     DCHECK_EQ(output_rep_type, SimdType::kInt16x8);
1038     DCHECK_EQ(input_rep_type, SimdType::kInt8x16);
1039     mask = kMask8;
1040   }
1041 
1042   int num_lanes = NumLanes(output_rep_type);
1043   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1044   for (int i = 0; i < num_lanes; ++i) {
1045     rep_node[i] =
1046         is_signed ? rep[i + start_index] : Mask(rep[i + start_index], mask);
1047   }
1048 
1049   ReplaceNode(node, rep_node, num_lanes);
1050 }
1051 
LowerPack(Node * node,SimdType input_rep_type,SimdType output_rep_type,bool is_signed)1052 void SimdScalarLowering::LowerPack(Node* node, SimdType input_rep_type,
1053                                    SimdType output_rep_type, bool is_signed) {
1054   DCHECK_EQ(2, node->InputCount());
1055   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
1056   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
1057   const Operator* less_op = machine()->Int32LessThan();
1058   Node* min = nullptr;
1059   Node* max = nullptr;
1060   const Operator* sign_extend;
1061   MachineRepresentation phi_rep;
1062   if (output_rep_type == SimdType::kInt16x8) {
1063     sign_extend = machine()->SignExtendWord16ToInt32();
1064     DCHECK(input_rep_type == SimdType::kInt32x4);
1065     if (is_signed) {
1066       min = mcgraph_->Int32Constant(std::numeric_limits<int16_t>::min());
1067       max = mcgraph_->Int32Constant(std::numeric_limits<int16_t>::max());
1068     } else {
1069       min = mcgraph_->Uint32Constant(std::numeric_limits<uint16_t>::min());
1070       max = mcgraph_->Uint32Constant(std::numeric_limits<uint16_t>::max());
1071     }
1072     phi_rep = MachineRepresentation::kWord16;
1073   } else {
1074     sign_extend = machine()->SignExtendWord8ToInt32();
1075     DCHECK(output_rep_type == SimdType::kInt8x16 &&
1076            input_rep_type == SimdType::kInt16x8);
1077     if (is_signed) {
1078       min = mcgraph_->Int32Constant(std::numeric_limits<int8_t>::min());
1079       max = mcgraph_->Int32Constant(std::numeric_limits<int8_t>::max());
1080     } else {
1081       min = mcgraph_->Uint32Constant(std::numeric_limits<uint8_t>::min());
1082       max = mcgraph_->Uint32Constant(std::numeric_limits<uint8_t>::max());
1083     }
1084     phi_rep = MachineRepresentation::kWord8;
1085   }
1086   int num_lanes = NumLanes(output_rep_type);
1087   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1088   for (int i = 0; i < num_lanes; ++i) {
1089     Node* input = nullptr;
1090     if (i < num_lanes / 2)
1091       input = rep_left[i];
1092     else
1093       input = rep_right[i - num_lanes / 2];
1094     Diamond d_min(graph(), common(), graph()->NewNode(less_op, input, min));
1095     input = d_min.Phi(phi_rep, min, input);
1096     Diamond d_max(graph(), common(), graph()->NewNode(less_op, max, input));
1097     // We keep nodes in sign-extended form. E.g. for uint8_t, we need to
1098     // compare with 0x000000ff (saturated narrowing), but the result of
1099     // conversion should be 0xffffffff to work well with the rest of lowering.
1100     rep_node[i] = graph()->NewNode(sign_extend, d_max.Phi(phi_rep, max, input));
1101   }
1102   ReplaceNode(node, rep_node, num_lanes);
1103 }
1104 
LowerShiftOp(Node * node,SimdType type)1105 void SimdScalarLowering::LowerShiftOp(Node* node, SimdType type) {
1106   DCHECK_EQ(2, node->InputCount());
1107 
1108   // The shift node, if it has a replacement, should be a single scalar.
1109   DCHECK_GE(1, ReplacementCount(node->InputAt(1)));
1110   Node* val = (HasReplacement(0, node->InputAt(1)))
1111                   ? GetReplacements(node->InputAt(1))[0]
1112                   : node->InputAt(1);
1113 
1114   Node* shift_node = Mask(val, GetMaskForShift(node));
1115   Node** rep = GetReplacementsWithType(node->InputAt(0), type);
1116   int num_lanes = NumLanes(type);
1117   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1118   for (int i = 0; i < num_lanes; ++i) {
1119     rep_node[i] = rep[i];
1120     switch (node->opcode()) {
1121       case IrOpcode::kI8x16ShrU:
1122         rep_node[i] = Mask(rep_node[i], kMask8);
1123         rep_node[i] =
1124             graph()->NewNode(machine()->Word32Shr(), rep_node[i], shift_node);
1125         break;
1126       case IrOpcode::kI16x8ShrU:
1127         rep_node[i] = Mask(rep_node[i], kMask16);
1128         V8_FALLTHROUGH;
1129       case IrOpcode::kI32x4ShrU:
1130         rep_node[i] =
1131             graph()->NewNode(machine()->Word32Shr(), rep_node[i], shift_node);
1132         break;
1133       case IrOpcode::kI64x2ShrU:
1134         rep_node[i] =
1135             graph()->NewNode(machine()->Word64Shr(), rep_node[i], shift_node);
1136         break;
1137       case IrOpcode::kI64x2Shl:
1138         rep_node[i] =
1139             graph()->NewNode(machine()->Word64Shl(), rep_node[i], shift_node);
1140         break;
1141       case IrOpcode::kI32x4Shl:
1142         rep_node[i] =
1143             graph()->NewNode(machine()->Word32Shl(), rep_node[i], shift_node);
1144         break;
1145       case IrOpcode::kI16x8Shl:
1146         rep_node[i] =
1147             graph()->NewNode(machine()->Word32Shl(), rep_node[i], shift_node);
1148         rep_node[i] = FixUpperBits(rep_node[i], kShift16);
1149         break;
1150       case IrOpcode::kI8x16Shl:
1151         rep_node[i] =
1152             graph()->NewNode(machine()->Word32Shl(), rep_node[i], shift_node);
1153         rep_node[i] = FixUpperBits(rep_node[i], kShift8);
1154         break;
1155       case IrOpcode::kI64x2ShrS:
1156         rep_node[i] =
1157             graph()->NewNode(machine()->Word64Sar(), rep_node[i], shift_node);
1158         break;
1159       case IrOpcode::kI32x4ShrS:
1160       case IrOpcode::kI16x8ShrS:
1161       case IrOpcode::kI8x16ShrS:
1162         rep_node[i] =
1163             graph()->NewNode(machine()->Word32Sar(), rep_node[i], shift_node);
1164         break;
1165       default:
1166         UNREACHABLE();
1167     }
1168   }
1169   ReplaceNode(node, rep_node, num_lanes);
1170 }
1171 
ConstructPhiForComparison(Diamond d,SimdType rep_type,int true_value,int false_value)1172 Node* SimdScalarLowering::ConstructPhiForComparison(Diamond d,
1173                                                     SimdType rep_type,
1174                                                     int true_value,
1175                                                     int false_value) {
1176   // Close the given Diamond d using a Phi node, taking care of constructing the
1177   // right kind of constants (Int32 or Int64) based on rep_type.
1178   if (rep_type == SimdType::kFloat64x2) {
1179     MachineRepresentation rep = MachineRepresentation::kWord64;
1180     return d.Phi(rep, mcgraph_->Int64Constant(true_value),
1181                  mcgraph_->Int64Constant(false_value));
1182   } else {
1183     MachineRepresentation rep =
1184         (rep_type == SimdType::kFloat32x4)
1185             ? MachineRepresentation::kWord32
1186             : MachineTypeFrom(rep_type).representation();
1187     return d.Phi(rep, mcgraph_->Int32Constant(true_value),
1188                  mcgraph_->Int32Constant(false_value));
1189   }
1190 }
1191 
LowerNotEqual(Node * node,SimdType input_rep_type,const Operator * op)1192 void SimdScalarLowering::LowerNotEqual(Node* node, SimdType input_rep_type,
1193                                        const Operator* op) {
1194   DCHECK_EQ(2, node->InputCount());
1195   Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
1196   Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
1197   int num_lanes = NumLanes(input_rep_type);
1198   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1199   for (int i = 0; i < num_lanes; ++i) {
1200     Diamond d(graph(), common(),
1201               graph()->NewNode(op, rep_left[i], rep_right[i]));
1202     rep_node[i] = ConstructPhiForComparison(d, input_rep_type, 0, -1);
1203   }
1204   ReplaceNode(node, rep_node, num_lanes);
1205 }
1206 
LowerBitMaskOp(Node * node,SimdType rep_type,int msb_index)1207 void SimdScalarLowering::LowerBitMaskOp(Node* node, SimdType rep_type,
1208                                         int msb_index) {
1209   Node** reps = GetReplacementsWithType(node->InputAt(0), rep_type);
1210   int num_lanes = NumLanes(rep_type);
1211   Node** rep_node = zone()->NewArray<Node*>(1);
1212   Node* result = mcgraph_->Int32Constant(0);
1213   uint32_t mask = 1 << msb_index;
1214 
1215   for (int i = 0; i < num_lanes; ++i) {
1216     // Lane i should end up at bit i in the final result.
1217     // +-----------------------------------------------------------------+
1218     // |       | msb_index |   (i < msb_index)    |    (i > msb_index)   |
1219     // +-------+-----------+----------------------+----------------------+
1220     // | i8x16 |     7     | msb >> (msb_index-i) | msb << (i-msb_index) |
1221     // | i16x8 |    15     | msb >> (msb_index-i) |         n/a          |
1222     // | i32x4 |    31     | msb >> (msb_index-i) |         n/a          |
1223     // +-------+-----------+----------------------+----------------------+
1224     Node* msb = Mask(reps[i], mask);
1225 
1226     if (i < msb_index) {
1227       int shift = msb_index - i;
1228       Node* shifted = graph()->NewNode(machine()->Word32Shr(), msb,
1229                                        mcgraph_->Int32Constant(shift));
1230       result = graph()->NewNode(machine()->Word32Or(), shifted, result);
1231     } else if (i > msb_index) {
1232       int shift = i - msb_index;
1233       Node* shifted = graph()->NewNode(machine()->Word32Shl(), msb,
1234                                        mcgraph_->Int32Constant(shift));
1235       result = graph()->NewNode(machine()->Word32Or(), shifted, result);
1236     } else {
1237       result = graph()->NewNode(machine()->Word32Or(), msb, result);
1238     }
1239   }
1240 
1241   rep_node[0] = result;
1242   ReplaceNode(node, rep_node, 1);
1243 }
1244 
LowerAllTrueOp(Node * node,SimdType rep_type)1245 void SimdScalarLowering::LowerAllTrueOp(Node* node, SimdType rep_type) {
1246   // AllTrue ops require the input to be of a particular SimdType, but the op
1247   // itself is always replaced by a Int32x4 with 1 node.
1248   int num_lanes = NumLanes(rep_type);
1249   DCHECK_EQ(1, node->InputCount());
1250   Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1251 
1252   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1253   Node* zero = mcgraph_->Int32Constant(0);
1254   Node* tmp_result = mcgraph_->Int32Constant(1);
1255   for (int i = 0; i < num_lanes; ++i) {
1256     Diamond d(graph(), common(),
1257               graph()->NewNode(machine()->Word32Equal(), rep[i], zero));
1258     tmp_result = d.Phi(MachineRepresentation::kWord32, zero, tmp_result);
1259   }
1260   rep_node[0] = tmp_result;
1261   ReplaceNode(node, rep_node, 1);
1262 }
1263 
LowerFloatPseudoMinMax(Node * node,const Operator * op,bool is_max,SimdType type)1264 void SimdScalarLowering::LowerFloatPseudoMinMax(Node* node, const Operator* op,
1265                                                 bool is_max, SimdType type) {
1266   DCHECK_EQ(2, node->InputCount());
1267   Node** rep_left = GetReplacementsWithType(node->InputAt(0), type);
1268   Node** rep_right = GetReplacementsWithType(node->InputAt(1), type);
1269   int num_lanes = NumLanes(type);
1270   Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1271   MachineRepresentation rep = MachineTypeFrom(type).representation();
1272   for (int i = 0; i < num_lanes; ++i) {
1273     Node* cmp = is_max ? graph()->NewNode(op, rep_left[i], rep_right[i])
1274                        : graph()->NewNode(op, rep_right[i], rep_left[i]);
1275     Diamond d(graph(), common(), cmp);
1276     rep_node[i] = d.Phi(rep, rep_right[i], rep_left[i]);
1277   }
1278   ReplaceNode(node, rep_node, num_lanes);
1279 }
1280 
LowerNode(Node * node)1281 void SimdScalarLowering::LowerNode(Node* node) {
1282   SimdType rep_type = ReplacementType(node);
1283   int num_lanes = NumLanes(rep_type);
1284   switch (node->opcode()) {
1285     case IrOpcode::kS128Const: {
1286       // We could use GetReplacementsWithType for all this, but it adds a lot of
1287       // nodes, so sign extend the immediates ourselves here.
1288       DCHECK_EQ(0, node->InputCount());
1289       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1290       S128ImmediateParameter params = S128ImmediateParameterOf(node->op());
1291 
1292       // For all the small ints below, we have a choice of static_cast or bit
1293       // twiddling, clang seems to be able to optimize either
1294       // (https://godbolt.org/z/9c65o8) so use static_cast for clarity.
1295       switch (rep_type) {
1296         case SimdType::kInt8x16: {
1297           for (int i = 0; i < num_lanes; ++i) {
1298             Address data_address = reinterpret_cast<Address>(params.data() + i);
1299             rep_node[i] = mcgraph_->Int32Constant(static_cast<int32_t>(
1300                 base::ReadLittleEndianValue<int8_t>(data_address)));
1301           }
1302           break;
1303         }
1304         case SimdType::kInt16x8: {
1305           int16_t val[kNumLanes16];
1306           memcpy(val, params.data(), kSimd128Size);
1307           for (int i = 0; i < num_lanes; ++i) {
1308             rep_node[i] = mcgraph_->Int32Constant(static_cast<int32_t>(
1309                 base::ReadLittleEndianValue<int16_t>(&val[i])));
1310           }
1311           break;
1312         }
1313         case SimdType::kInt32x4: {
1314           uint32_t val[kNumLanes32];
1315           memcpy(val, params.data(), kSimd128Size);
1316           for (int i = 0; i < num_lanes; ++i) {
1317             rep_node[i] = mcgraph_->Int32Constant(
1318                 base::ReadLittleEndianValue<uint32_t>(&val[i]));
1319           }
1320           break;
1321         }
1322         case SimdType::kInt64x2: {
1323           uint64_t val[kNumLanes64];
1324           memcpy(val, params.data(), kSimd128Size);
1325           for (int i = 0; i < num_lanes; ++i) {
1326             rep_node[i] = mcgraph_->Int64Constant(
1327                 base::ReadLittleEndianValue<uint64_t>(&val[i]));
1328           }
1329           break;
1330         }
1331         case SimdType::kFloat32x4: {
1332           float val[kNumLanes32];
1333           memcpy(val, params.data(), kSimd128Size);
1334           for (int i = 0; i < num_lanes; ++i) {
1335             rep_node[i] = mcgraph_->Float32Constant(
1336                 base::ReadLittleEndianValue<float>(&val[i]));
1337           }
1338           break;
1339         }
1340         case SimdType::kFloat64x2: {
1341           double val[kNumLanes64];
1342           memcpy(val, params.data(), kSimd128Size);
1343           for (int i = 0; i < num_lanes; ++i) {
1344             rep_node[i] = mcgraph_->Float64Constant(
1345                 base::ReadLittleEndianValue<double>(&val[i]));
1346           }
1347           break;
1348         }
1349       }
1350       ReplaceNode(node, rep_node, num_lanes);
1351       break;
1352     }
1353     case IrOpcode::kStart: {
1354       int parameter_count = GetParameterCountAfterLowering();
1355       // Only exchange the node if the parameter count actually changed.
1356       if (parameter_count != static_cast<int>(signature()->parameter_count())) {
1357         int delta =
1358             parameter_count - static_cast<int>(signature()->parameter_count());
1359         int new_output_count = node->op()->ValueOutputCount() + delta;
1360         NodeProperties::ChangeOp(node, common()->Start(new_output_count));
1361       }
1362       break;
1363     }
1364     case IrOpcode::kParameter: {
1365       DCHECK_EQ(1, node->InputCount());
1366       int param_count = static_cast<int>(signature()->parameter_count());
1367       // Only exchange the node if the parameter count actually changed. We do
1368       // not even have to do the default lowering because the start node,
1369       // the only input of a parameter node, only changes if the parameter count
1370       // changes.
1371       if (GetParameterCountAfterLowering() != param_count) {
1372         int old_index = ParameterIndexOf(node->op());
1373         // Parameter index 0 is the instance parameter, we will use old_index to
1374         // index into the function signature, so we need to decrease it by 1.
1375         --old_index;
1376         int new_index =
1377             GetParameterIndexAfterLoweringSimd128(signature(), old_index);
1378         // Similarly, the index into function signature needs to account for the
1379         // instance parameter, so increase it by 1.
1380         ++new_index;
1381         NodeProperties::ChangeOp(node, common()->Parameter(new_index));
1382 
1383         if (old_index < 0) {
1384           break;
1385         }
1386 
1387         DCHECK(old_index < param_count);
1388 
1389         if (signature()->GetParam(old_index) ==
1390             MachineRepresentation::kSimd128) {
1391           Node* new_node[kNumLanes32];
1392           new_node[0] = node;
1393           for (int i = 1; i < kNumLanes32; ++i) {
1394             new_node[i] = graph()->NewNode(common()->Parameter(new_index + i),
1395                                            graph()->start());
1396           }
1397           ReplaceNode(node, new_node, kNumLanes32);
1398         }
1399       }
1400       break;
1401     }
1402     case IrOpcode::kSimd128ReverseBytes: {
1403       DCHECK_EQ(1, node->InputCount());
1404       SimdType input_type = ReplacementType(node->InputAt(0));
1405       bool is_float = input_type == SimdType::kFloat32x4 ||
1406                       input_type == SimdType::kFloat64x2;
1407       replacements_[node->id()].type =
1408           is_float ? SimdType::kFloat32x4 : SimdType::kInt32x4;
1409       Node** rep = GetReplacementsWithType(
1410           node->InputAt(0),
1411           is_float ? SimdType::kFloat32x4 : SimdType::kInt32x4);
1412       Node* rep_node[kNumLanes32];
1413       for (int i = 0; i < kNumLanes32; ++i) {
1414         Node* temp = is_float ? graph()->NewNode(
1415                                     machine()->BitcastFloat32ToInt32(), rep[i])
1416                               : rep[i];
1417         temp = graph()->NewNode(machine()->Word32ReverseBytes(), temp);
1418         rep_node[kNumLanes32 - 1 - i] =
1419             is_float
1420                 ? graph()->NewNode(machine()->BitcastInt32ToFloat32(), temp)
1421                 : temp;
1422       }
1423       ReplaceNode(node, rep_node, kNumLanes32);
1424       break;
1425     }
1426     case IrOpcode::kLoad:
1427     case IrOpcode::kUnalignedLoad:
1428     case IrOpcode::kProtectedLoad: {
1429       LowerLoadOp(node, rep_type);
1430       break;
1431     }
1432     case IrOpcode::kLoadTransform: {
1433       LowerLoadTransformOp(node, rep_type);
1434       break;
1435     }
1436     case IrOpcode::kStore:
1437     case IrOpcode::kUnalignedStore:
1438     case IrOpcode::kProtectedStore: {
1439       LowerStoreOp(node);
1440       break;
1441     }
1442     case IrOpcode::kReturn: {
1443       int old_input_count = node->InputCount();
1444       int return_arity = static_cast<int>(signature()->return_count());
1445       for (int i = 0; i < return_arity; i++) {
1446         if (signature()->GetReturn(i) != MachineRepresentation::kSimd128) {
1447           continue;
1448         }
1449 
1450         // Return nodes have a hidden input at value 0.
1451         Node* input = node->InputAt(i + 1);
1452         if (!HasReplacement(0, input)) {
1453           continue;
1454         }
1455 
1456         // V128 return types are lowered to i32x4.
1457         Node** reps = GetReplacementsWithType(input, rep_type);
1458         ReplaceNode(input, reps, NumLanes(rep_type));
1459       }
1460 
1461       DefaultLowering(node);
1462       // Nothing needs to be done here since inputs did not change.
1463       if (old_input_count == node->InputCount()) {
1464         break;
1465       }
1466 
1467       int new_return_count = GetReturnCountAfterLoweringSimd128(signature());
1468       if (static_cast<int>(signature()->return_count()) != new_return_count) {
1469         NodeProperties::ChangeOp(node, common()->Return(new_return_count));
1470       }
1471       break;
1472     }
1473     case IrOpcode::kCall: {
1474       // TODO(turbofan): Make wasm code const-correct wrt. CallDescriptor.
1475       auto call_descriptor =
1476           const_cast<CallDescriptor*>(CallDescriptorOf(node->op()));
1477       bool returns_require_lowering =
1478           GetReturnCountAfterLoweringSimd128(call_descriptor) !=
1479           static_cast<int>(call_descriptor->ReturnCount());
1480 
1481       // All call arguments are lowered to i32x4 in the call descriptor, so the
1482       // arguments need to be converted to i32x4 as well.
1483       for (int i = NodeProperties::PastValueIndex(node) - 1; i >= 0; i--) {
1484         Node* input = node->InputAt(i);
1485         if (ReplacementCount(input) == 1) {
1486           // Special case for extract lanes
1487           Node** reps = GetReplacements(input);
1488           ReplaceNode(input, reps, 1);
1489         } else if (HasReplacement(0, input)) {
1490           Node** reps = GetReplacementsWithType(input, SimdType::kInt32x4);
1491           ReplaceNode(input, reps, NumLanes(SimdType::kInt32x4));
1492         }
1493       }
1494 
1495       if (DefaultLowering(node) || returns_require_lowering) {
1496         // We have to adjust the call descriptor.
1497         const Operator* op = common()->Call(
1498             GetI32WasmCallDescriptorForSimd(zone(), call_descriptor));
1499         NodeProperties::ChangeOp(node, op);
1500       }
1501 
1502       if (!returns_require_lowering) {
1503         break;
1504       }
1505 
1506       size_t return_arity = call_descriptor->ReturnCount();
1507 
1508       if (return_arity == 1) {
1509         // We access the additional return values through projections.
1510         // Special case for return_arity 1, with multi-returns, we would have
1511         // already built projections for each return value, and will be handled
1512         // by the following code.
1513         Node* rep_node[kNumLanes32];
1514         for (int i = 0; i < kNumLanes32; ++i) {
1515           rep_node[i] =
1516               graph()->NewNode(common()->Projection(i), node, graph()->start());
1517         }
1518         ReplaceNode(node, rep_node, kNumLanes32);
1519         break;
1520       }
1521 
1522       ZoneVector<Node*> projections(return_arity, zone());
1523       NodeProperties::CollectValueProjections(node, projections.data(),
1524                                               return_arity);
1525 
1526       for (size_t old_index = 0, new_index = 0; old_index < return_arity;
1527            ++old_index, ++new_index) {
1528         Node* use_node = projections[old_index];
1529         DCHECK_EQ(ProjectionIndexOf(use_node->op()), old_index);
1530         DCHECK_EQ(GetReturnIndexAfterLowering(call_descriptor,
1531                                               static_cast<int>(old_index)),
1532                   static_cast<int>(new_index));
1533         if (new_index != old_index) {
1534           NodeProperties::ChangeOp(use_node, common()->Projection(new_index));
1535         }
1536         if (call_descriptor->GetReturnType(old_index).representation() ==
1537             MachineRepresentation::kSimd128) {
1538           Node* rep_node[kNumLanes32];
1539           for (int i = 0; i < kNumLanes32; ++i) {
1540             rep_node[i] = graph()->NewNode(common()->Projection(new_index + i),
1541                                            node, graph()->start());
1542           }
1543           ReplaceNode(use_node, rep_node, kNumLanes32);
1544           new_index += kNumLanes32 - 1;
1545         }
1546       }
1547       break;
1548     }
1549     case IrOpcode::kPhi: {
1550       MachineRepresentation rep = PhiRepresentationOf(node->op());
1551       if (rep == MachineRepresentation::kSimd128) {
1552         // The replacement nodes have already been created, we only have to
1553         // replace placeholder nodes.
1554         Node** rep_node = GetReplacements(node);
1555         for (int i = 0; i < node->op()->ValueInputCount(); ++i) {
1556           Node** rep_input =
1557               GetReplacementsWithType(node->InputAt(i), rep_type);
1558           for (int j = 0; j < num_lanes; j++) {
1559             rep_node[j]->ReplaceInput(i, rep_input[j]);
1560           }
1561         }
1562       } else {
1563         DefaultLowering(node);
1564       }
1565       break;
1566     }
1567     case IrOpcode::kI64x2Add: {
1568       LowerBinaryOp(node, rep_type, machine()->Int64Add());
1569       break;
1570     }
1571     case IrOpcode::kI64x2Sub: {
1572       LowerBinaryOp(node, rep_type, machine()->Int64Sub());
1573       break;
1574     }
1575     case IrOpcode::kI64x2Mul: {
1576       LowerBinaryOp(node, rep_type, machine()->Int64Mul());
1577       break;
1578     }
1579 #define I32X4_BINOP_CASE(opcode, instruction)                \
1580   case IrOpcode::opcode: {                                   \
1581     LowerBinaryOp(node, rep_type, machine()->instruction()); \
1582     break;                                                   \
1583   }
1584       I32X4_BINOP_CASE(kI32x4Add, Int32Add)
1585       I32X4_BINOP_CASE(kI32x4Sub, Int32Sub)
1586       I32X4_BINOP_CASE(kI32x4Mul, Int32Mul)
1587       I32X4_BINOP_CASE(kS128And, Word32And)
1588       I32X4_BINOP_CASE(kS128Or, Word32Or)
1589       I32X4_BINOP_CASE(kS128Xor, Word32Xor)
1590 #undef I32X4_BINOP_CASE
1591     case IrOpcode::kI32x4AddHoriz: {
1592       LowerBinaryOp(node, rep_type, machine()->Int32Add(), false);
1593       break;
1594     }
1595     case IrOpcode::kI16x8AddHoriz: {
1596       LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Add(), false);
1597       break;
1598     }
1599     case IrOpcode::kI16x8Add:
1600     case IrOpcode::kI8x16Add: {
1601       LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Add());
1602       break;
1603     }
1604     case IrOpcode::kI16x8Sub:
1605     case IrOpcode::kI8x16Sub: {
1606       LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Sub());
1607       break;
1608     }
1609     case IrOpcode::kI16x8Mul:
1610     case IrOpcode::kI8x16Mul: {
1611       LowerBinaryOpForSmallInt(node, rep_type, machine()->Int32Mul());
1612       break;
1613     }
1614     case IrOpcode::kI16x8AddSatS:
1615     case IrOpcode::kI8x16AddSatS: {
1616       LowerSaturateBinaryOp(node, rep_type, machine()->Int32Add(), true);
1617       break;
1618     }
1619     case IrOpcode::kI16x8SubSatS:
1620     case IrOpcode::kI8x16SubSatS: {
1621       LowerSaturateBinaryOp(node, rep_type, machine()->Int32Sub(), true);
1622       break;
1623     }
1624     case IrOpcode::kI16x8AddSatU:
1625     case IrOpcode::kI8x16AddSatU: {
1626       LowerSaturateBinaryOp(node, rep_type, machine()->Int32Add(), false);
1627       break;
1628     }
1629     case IrOpcode::kI16x8SubSatU:
1630     case IrOpcode::kI8x16SubSatU: {
1631       LowerSaturateBinaryOp(node, rep_type, machine()->Int32Sub(), false);
1632       break;
1633     }
1634     case IrOpcode::kI32x4MaxS:
1635     case IrOpcode::kI16x8MaxS:
1636     case IrOpcode::kI8x16MaxS: {
1637       LowerIntMinMax(node, machine()->Int32LessThan(), true, rep_type);
1638       break;
1639     }
1640     case IrOpcode::kI32x4MinS:
1641     case IrOpcode::kI16x8MinS:
1642     case IrOpcode::kI8x16MinS: {
1643       LowerIntMinMax(node, machine()->Int32LessThan(), false, rep_type);
1644       break;
1645     }
1646     case IrOpcode::kI32x4MaxU:
1647     case IrOpcode::kI16x8MaxU:
1648     case IrOpcode::kI8x16MaxU: {
1649       LowerIntMinMax(node, machine()->Uint32LessThan(), true, rep_type);
1650       break;
1651     }
1652     case IrOpcode::kI32x4MinU:
1653     case IrOpcode::kI16x8MinU:
1654     case IrOpcode::kI8x16MinU: {
1655       LowerIntMinMax(node, machine()->Uint32LessThan(), false, rep_type);
1656       break;
1657     }
1658     case IrOpcode::kI32x4DotI16x8S: {
1659       // i32x4.dot_i16x8_s wants the inputs to be i16x8, but outputs to i32x4.
1660       DCHECK_EQ(2, node->InputCount());
1661       Node** rep_left =
1662           GetReplacementsWithType(node->InputAt(0), SimdType::kInt16x8);
1663       Node** rep_right =
1664           GetReplacementsWithType(node->InputAt(1), SimdType::kInt16x8);
1665       int num_lanes = NumLanes(rep_type);
1666       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1667       for (int i = 0; i < num_lanes; ++i) {
1668         Node* lo = graph()->NewNode(machine()->Int32Mul(), rep_left[i * 2],
1669                                     rep_right[i * 2]);
1670         Node* hi = graph()->NewNode(machine()->Int32Mul(), rep_left[i * 2 + 1],
1671                                     rep_right[i * 2 + 1]);
1672         rep_node[i] = graph()->NewNode(machine()->Int32Add(), lo, hi);
1673       }
1674       ReplaceNode(node, rep_node, num_lanes);
1675       break;
1676     }
1677     case IrOpcode::kI64x2Neg: {
1678       DCHECK_EQ(1, node->InputCount());
1679       Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1680       int num_lanes = NumLanes(rep_type);
1681       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1682       Node* zero = graph()->NewNode(common()->Int64Constant(0));
1683       for (int i = 0; i < num_lanes; ++i) {
1684         rep_node[i] = graph()->NewNode(machine()->Int64Sub(), zero, rep[i]);
1685       }
1686       ReplaceNode(node, rep_node, num_lanes);
1687       break;
1688     }
1689     case IrOpcode::kI32x4Neg:
1690     case IrOpcode::kI16x8Neg:
1691     case IrOpcode::kI8x16Neg: {
1692       DCHECK_EQ(1, node->InputCount());
1693       Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1694       int num_lanes = NumLanes(rep_type);
1695       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1696       Node* zero = graph()->NewNode(common()->Int32Constant(0));
1697       for (int i = 0; i < num_lanes; ++i) {
1698         rep_node[i] = graph()->NewNode(machine()->Int32Sub(), zero, rep[i]);
1699         if (node->opcode() == IrOpcode::kI16x8Neg) {
1700           rep_node[i] = FixUpperBits(rep_node[i], kShift16);
1701         } else if (node->opcode() == IrOpcode::kI8x16Neg) {
1702           rep_node[i] = FixUpperBits(rep_node[i], kShift8);
1703         }
1704       }
1705       ReplaceNode(node, rep_node, num_lanes);
1706       break;
1707     }
1708     case IrOpcode::kI32x4Abs:
1709     case IrOpcode::kI16x8Abs:
1710     case IrOpcode::kI8x16Abs: {
1711       // From https://stackoverflow.com/a/14194764
1712       // abs(x) = (x XOR y) - y
1713       Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1714       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1715       for (int i = 0; i < num_lanes; ++i) {
1716         // It's fine to shift by 31 even for i8x16 since each node is
1717         // effectively expanded to 32 bits.
1718         Node* y = graph()->NewNode(machine()->Word32Sar(), rep[i],
1719                                    mcgraph_->Int32Constant(31));
1720         rep_node[i] = graph()->NewNode(
1721             machine()->Int32Sub(),
1722             graph()->NewNode(machine()->Word32Xor(), rep[i], y), y);
1723         if (node->opcode() == IrOpcode::kI16x8Neg) {
1724           rep_node[i] = FixUpperBits(rep_node[i], kShift16);
1725         } else if (node->opcode() == IrOpcode::kI8x16Neg) {
1726           rep_node[i] = FixUpperBits(rep_node[i], kShift8);
1727         }
1728       }
1729       ReplaceNode(node, rep_node, num_lanes);
1730       break;
1731     }
1732     case IrOpcode::kS128Zero: {
1733       DCHECK_EQ(0, node->InputCount());
1734       Node* rep_node[kNumLanes32];
1735       for (int i = 0; i < kNumLanes32; ++i) {
1736         rep_node[i] = mcgraph_->Int32Constant(0);
1737       }
1738       ReplaceNode(node, rep_node, kNumLanes32);
1739       break;
1740     }
1741     case IrOpcode::kS128Not: {
1742       DCHECK_EQ(1, node->InputCount());
1743       Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1744       Node* rep_node[kNumLanes32];
1745       Node* mask = graph()->NewNode(common()->Int32Constant(0xFFFFFFFF));
1746       for (int i = 0; i < kNumLanes32; ++i) {
1747         rep_node[i] = graph()->NewNode(machine()->Word32Xor(), rep[i], mask);
1748       }
1749       ReplaceNode(node, rep_node, kNumLanes32);
1750       break;
1751     }
1752     case IrOpcode::kS128AndNot: {
1753       DCHECK_EQ(2, node->InputCount());
1754       Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
1755       Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
1756       int num_lanes = NumLanes(rep_type);
1757       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1758       Node* mask = graph()->NewNode(common()->Int32Constant(0xFFFFFFFF));
1759       for (int i = 0; i < num_lanes; ++i) {
1760         Node* not_rep_right =
1761             graph()->NewNode(machine()->Word32Xor(), rep_right[i], mask);
1762         rep_node[i] = graph()->NewNode(machine()->Word32And(), rep_left[i],
1763                                        not_rep_right);
1764       }
1765       ReplaceNode(node, rep_node, num_lanes);
1766       break;
1767     }
1768     case IrOpcode::kI32x4SConvertF32x4: {
1769       LowerConvertFromFloat(node, true);
1770       break;
1771     }
1772     case IrOpcode::kI32x4UConvertF32x4: {
1773       LowerConvertFromFloat(node, false);
1774       break;
1775     }
1776     case IrOpcode::kI32x4SConvertI16x8Low: {
1777       LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
1778                           0);
1779       break;
1780     }
1781     case IrOpcode::kI32x4SConvertI16x8High: {
1782       LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, true,
1783                           4);
1784       break;
1785     }
1786     case IrOpcode::kI32x4UConvertI16x8Low: {
1787       LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
1788                           0);
1789       break;
1790     }
1791     case IrOpcode::kI32x4UConvertI16x8High: {
1792       LowerConvertFromInt(node, SimdType::kInt16x8, SimdType::kInt32x4, false,
1793                           4);
1794       break;
1795     }
1796     case IrOpcode::kI16x8SConvertI8x16Low: {
1797       LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
1798                           0);
1799       break;
1800     }
1801     case IrOpcode::kI16x8SConvertI8x16High: {
1802       LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, true,
1803                           8);
1804       break;
1805     }
1806     case IrOpcode::kI16x8UConvertI8x16Low: {
1807       LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
1808                           0);
1809       break;
1810     }
1811     case IrOpcode::kI16x8UConvertI8x16High: {
1812       LowerConvertFromInt(node, SimdType::kInt8x16, SimdType::kInt16x8, false,
1813                           8);
1814       break;
1815     }
1816     case IrOpcode::kI16x8SConvertI32x4: {
1817       LowerPack(node, SimdType::kInt32x4, SimdType::kInt16x8, true);
1818       break;
1819     }
1820     case IrOpcode::kI16x8UConvertI32x4: {
1821       LowerPack(node, SimdType::kInt32x4, SimdType::kInt16x8, false);
1822       break;
1823     }
1824     case IrOpcode::kI8x16SConvertI16x8: {
1825       LowerPack(node, SimdType::kInt16x8, SimdType::kInt8x16, true);
1826       break;
1827     }
1828     case IrOpcode::kI8x16UConvertI16x8: {
1829       LowerPack(node, SimdType::kInt16x8, SimdType::kInt8x16, false);
1830       break;
1831     }
1832     case IrOpcode::kI64x2Shl:
1833     case IrOpcode::kI32x4Shl:
1834     case IrOpcode::kI16x8Shl:
1835     case IrOpcode::kI8x16Shl:
1836     case IrOpcode::kI64x2ShrS:
1837     case IrOpcode::kI32x4ShrS:
1838     case IrOpcode::kI16x8ShrS:
1839     case IrOpcode::kI8x16ShrS:
1840     case IrOpcode::kI64x2ShrU:
1841     case IrOpcode::kI32x4ShrU:
1842     case IrOpcode::kI16x8ShrU:
1843     case IrOpcode::kI8x16ShrU: {
1844       LowerShiftOp(node, rep_type);
1845       break;
1846     }
1847     case IrOpcode::kF32x4AddHoriz: {
1848       LowerBinaryOp(node, rep_type, machine()->Float32Add(), false);
1849       break;
1850     }
1851 #define F32X4_BINOP_CASE(name)                                 \
1852   case IrOpcode::kF32x4##name: {                               \
1853     LowerBinaryOp(node, rep_type, machine()->Float32##name()); \
1854     break;                                                     \
1855   }
1856       F32X4_BINOP_CASE(Add)
1857       F32X4_BINOP_CASE(Sub)
1858       F32X4_BINOP_CASE(Mul)
1859       F32X4_BINOP_CASE(Div)
1860       F32X4_BINOP_CASE(Min)
1861       F32X4_BINOP_CASE(Max)
1862     case IrOpcode::kF32x4Pmin: {
1863       LowerFloatPseudoMinMax(node, machine()->Float32LessThan(), false,
1864                              rep_type);
1865       break;
1866     }
1867     case IrOpcode::kF32x4Pmax: {
1868       LowerFloatPseudoMinMax(node, machine()->Float32LessThan(), true,
1869                              rep_type);
1870       break;
1871     }
1872 #undef F32X4_BINOP_CASE
1873 #define F32X4_UNOP_CASE(name)                                 \
1874   case IrOpcode::kF32x4##name: {                              \
1875     LowerUnaryOp(node, rep_type, machine()->Float32##name()); \
1876     break;                                                    \
1877   }
1878       F32X4_UNOP_CASE(Abs)
1879       F32X4_UNOP_CASE(Neg)
1880       F32X4_UNOP_CASE(Sqrt)
1881 #undef F32X4_UNOP_CASE
1882     case IrOpcode::kF32x4Ceil: {
1883       LowerUnaryOp(node, rep_type, machine()->Float32RoundUp().op());
1884       break;
1885     }
1886     case IrOpcode::kF32x4Floor: {
1887       LowerUnaryOp(node, rep_type, machine()->Float32RoundDown().op());
1888       break;
1889     }
1890     case IrOpcode::kF32x4Trunc: {
1891       LowerUnaryOp(node, rep_type, machine()->Float32RoundTruncate().op());
1892       break;
1893     }
1894     case IrOpcode::kF32x4NearestInt: {
1895       LowerUnaryOp(node, rep_type, machine()->Float32RoundTiesEven().op());
1896       break;
1897     }
1898     case IrOpcode::kF32x4RecipApprox:
1899     case IrOpcode::kF32x4RecipSqrtApprox: {
1900       DCHECK_EQ(1, node->InputCount());
1901       Node** rep = GetReplacementsWithType(node->InputAt(0), rep_type);
1902       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1903       Node* float_one = graph()->NewNode(common()->Float32Constant(1.0));
1904       for (int i = 0; i < num_lanes; ++i) {
1905         Node* tmp = rep[i];
1906         if (node->opcode() == IrOpcode::kF32x4RecipSqrtApprox) {
1907           tmp = graph()->NewNode(machine()->Float32Sqrt(), rep[i]);
1908         }
1909         rep_node[i] = graph()->NewNode(machine()->Float32Div(), float_one, tmp);
1910       }
1911       ReplaceNode(node, rep_node, num_lanes);
1912       break;
1913     }
1914     case IrOpcode::kF32x4SConvertI32x4: {
1915       LowerUnaryOp(node, SimdType::kInt32x4, machine()->RoundInt32ToFloat32());
1916       break;
1917     }
1918     case IrOpcode::kF32x4UConvertI32x4: {
1919       LowerUnaryOp(node, SimdType::kInt32x4, machine()->RoundUint32ToFloat32());
1920       break;
1921     }
1922     case IrOpcode::kF64x2Abs: {
1923       LowerUnaryOp(node, rep_type, machine()->Float64Abs());
1924       break;
1925     }
1926     case IrOpcode::kF64x2Neg: {
1927       LowerUnaryOp(node, rep_type, machine()->Float64Neg());
1928       break;
1929     }
1930     case IrOpcode::kF64x2Sqrt: {
1931       LowerUnaryOp(node, rep_type, machine()->Float64Sqrt());
1932       break;
1933     }
1934     case IrOpcode::kF64x2Add: {
1935       LowerBinaryOp(node, rep_type, machine()->Float64Add());
1936       break;
1937     }
1938     case IrOpcode::kF64x2Sub: {
1939       LowerBinaryOp(node, rep_type, machine()->Float64Sub());
1940       break;
1941     }
1942     case IrOpcode::kF64x2Mul: {
1943       LowerBinaryOp(node, rep_type, machine()->Float64Mul());
1944       break;
1945     }
1946     case IrOpcode::kF64x2Div: {
1947       LowerBinaryOp(node, rep_type, machine()->Float64Div());
1948       break;
1949     }
1950     case IrOpcode::kF64x2Min: {
1951       LowerBinaryOp(node, rep_type, machine()->Float64Min());
1952       break;
1953     }
1954     case IrOpcode::kF64x2Max: {
1955       LowerBinaryOp(node, rep_type, machine()->Float64Max());
1956       break;
1957     }
1958     case IrOpcode::kF64x2Pmin: {
1959       LowerFloatPseudoMinMax(node, machine()->Float64LessThan(), false,
1960                              rep_type);
1961       break;
1962     }
1963     case IrOpcode::kF64x2Pmax: {
1964       LowerFloatPseudoMinMax(node, machine()->Float64LessThan(), true,
1965                              rep_type);
1966       break;
1967     }
1968     case IrOpcode::kF64x2Ceil: {
1969       LowerUnaryOp(node, rep_type, machine()->Float64RoundUp().op());
1970       break;
1971     }
1972     case IrOpcode::kF64x2Floor: {
1973       LowerUnaryOp(node, rep_type, machine()->Float64RoundDown().op());
1974       break;
1975     }
1976     case IrOpcode::kF64x2Trunc: {
1977       LowerUnaryOp(node, rep_type, machine()->Float64RoundTruncate().op());
1978       break;
1979     }
1980     case IrOpcode::kF64x2NearestInt: {
1981       LowerUnaryOp(node, rep_type, machine()->Float64RoundTiesEven().op());
1982       break;
1983     }
1984     case IrOpcode::kF64x2Splat:
1985     case IrOpcode::kF32x4Splat:
1986     case IrOpcode::kI64x2Splat:
1987     case IrOpcode::kI32x4Splat:
1988     case IrOpcode::kI16x8Splat:
1989     case IrOpcode::kI8x16Splat: {
1990       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
1991       Node* val = (HasReplacement(0, node->InputAt(0)))
1992                       ? GetReplacements(node->InputAt(0))[0]
1993                       : node->InputAt(0);
1994 
1995       // I16 and I8 are placed in Word32 nodes, we need to mask them
1996       // accordingly, to account for overflows, then sign extend them.
1997       if (node->opcode() == IrOpcode::kI16x8Splat) {
1998         val = graph()->NewNode(machine()->SignExtendWord16ToInt32(),
1999                                Mask(val, kMask16));
2000       } else if (node->opcode() == IrOpcode::kI8x16Splat) {
2001         val = graph()->NewNode(machine()->SignExtendWord8ToInt32(),
2002                                Mask(val, kMask8));
2003       }
2004 
2005       for (int i = 0; i < num_lanes; ++i) {
2006         rep_node[i] = val;
2007       }
2008       ReplaceNode(node, rep_node, num_lanes);
2009       break;
2010     }
2011     case IrOpcode::kF64x2ExtractLane:
2012     case IrOpcode::kF32x4ExtractLane:
2013     case IrOpcode::kI64x2ExtractLane:
2014     case IrOpcode::kI32x4ExtractLane:
2015     case IrOpcode::kI16x8ExtractLaneU:
2016     case IrOpcode::kI16x8ExtractLaneS:
2017     case IrOpcode::kI8x16ExtractLaneU:
2018     case IrOpcode::kI8x16ExtractLaneS: {
2019       int32_t lane = OpParameter<int32_t>(node->op());
2020       Node** rep_node = zone()->NewArray<Node*>(1);
2021       rep_node[0] = GetReplacementsWithType(node->InputAt(0), rep_type)[lane];
2022 
2023       // If unsigned, mask the top bits.
2024       if (node->opcode() == IrOpcode::kI16x8ExtractLaneU) {
2025         rep_node[0] = Mask(rep_node[0], kMask16);
2026       } else if (node->opcode() == IrOpcode::kI8x16ExtractLaneU) {
2027         rep_node[0] = Mask(rep_node[0], kMask8);
2028       }
2029 
2030       ReplaceNode(node, rep_node, 1);
2031       break;
2032     }
2033     case IrOpcode::kF64x2ReplaceLane:
2034     case IrOpcode::kF32x4ReplaceLane:
2035     case IrOpcode::kI64x2ReplaceLane:
2036     case IrOpcode::kI32x4ReplaceLane:
2037     case IrOpcode::kI16x8ReplaceLane:
2038     case IrOpcode::kI8x16ReplaceLane: {
2039       DCHECK_EQ(2, node->InputCount());
2040       Node* repNode = node->InputAt(1);
2041       int32_t lane = OpParameter<int32_t>(node->op());
2042       Node** old_rep_node = GetReplacementsWithType(node->InputAt(0), rep_type);
2043       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
2044       for (int i = 0; i < num_lanes; ++i) {
2045         rep_node[i] = old_rep_node[i];
2046       }
2047       if (HasReplacement(0, repNode)) {
2048         rep_node[lane] = GetReplacements(repNode)[0];
2049       } else {
2050         rep_node[lane] = repNode;
2051       }
2052 
2053       // The replacement nodes for these opcodes are in Word32, and we always
2054       // store nodes in sign extended form (and mask to account for overflows.)
2055       if (node->opcode() == IrOpcode::kI16x8ReplaceLane) {
2056         rep_node[lane] = graph()->NewNode(machine()->SignExtendWord16ToInt32(),
2057                                           Mask(rep_node[lane], kMask16));
2058       } else if (node->opcode() == IrOpcode::kI8x16ReplaceLane) {
2059         rep_node[lane] = graph()->NewNode(machine()->SignExtendWord8ToInt32(),
2060                                           Mask(rep_node[lane], kMask8));
2061       }
2062 
2063       ReplaceNode(node, rep_node, num_lanes);
2064       break;
2065     }
2066 #define COMPARISON_CASE(type, simd_op, lowering_op, invert)                    \
2067   case IrOpcode::simd_op: {                                                    \
2068     LowerCompareOp(node, SimdType::k##type, machine()->lowering_op(), invert); \
2069     break;                                                                     \
2070   }
2071       COMPARISON_CASE(Float64x2, kF64x2Eq, Float64Equal, false)
2072       COMPARISON_CASE(Float64x2, kF64x2Lt, Float64LessThan, false)
2073       COMPARISON_CASE(Float64x2, kF64x2Le, Float64LessThanOrEqual, false)
2074       COMPARISON_CASE(Float32x4, kF32x4Eq, Float32Equal, false)
2075       COMPARISON_CASE(Float32x4, kF32x4Lt, Float32LessThan, false)
2076       COMPARISON_CASE(Float32x4, kF32x4Le, Float32LessThanOrEqual, false)
2077       COMPARISON_CASE(Float32x4, kF32x4Gt, Float32LessThan, true)
2078       COMPARISON_CASE(Float32x4, kF32x4Ge, Float32LessThanOrEqual, true)
2079       COMPARISON_CASE(Int32x4, kI32x4Eq, Word32Equal, false)
2080       COMPARISON_CASE(Int32x4, kI32x4LtS, Int32LessThan, false)
2081       COMPARISON_CASE(Int32x4, kI32x4LeS, Int32LessThanOrEqual, false)
2082       COMPARISON_CASE(Int32x4, kI32x4GtS, Int32LessThan, true)
2083       COMPARISON_CASE(Int32x4, kI32x4GeS, Int32LessThanOrEqual, true)
2084       COMPARISON_CASE(Int32x4, kI32x4LtU, Uint32LessThan, false)
2085       COMPARISON_CASE(Int32x4, kI32x4LeU, Uint32LessThanOrEqual, false)
2086       COMPARISON_CASE(Int32x4, kI32x4GtU, Uint32LessThan, true)
2087       COMPARISON_CASE(Int32x4, kI32x4GeU, Uint32LessThanOrEqual, true)
2088       COMPARISON_CASE(Int16x8, kI16x8Eq, Word32Equal, false)
2089       COMPARISON_CASE(Int16x8, kI16x8LtS, Int32LessThan, false)
2090       COMPARISON_CASE(Int16x8, kI16x8LeS, Int32LessThanOrEqual, false)
2091       COMPARISON_CASE(Int16x8, kI16x8GtS, Int32LessThan, true)
2092       COMPARISON_CASE(Int16x8, kI16x8GeS, Int32LessThanOrEqual, true)
2093       COMPARISON_CASE(Int16x8, kI16x8LtU, Uint32LessThan, false)
2094       COMPARISON_CASE(Int16x8, kI16x8LeU, Uint32LessThanOrEqual, false)
2095       COMPARISON_CASE(Int16x8, kI16x8GtU, Uint32LessThan, true)
2096       COMPARISON_CASE(Int16x8, kI16x8GeU, Uint32LessThanOrEqual, true)
2097       COMPARISON_CASE(Int8x16, kI8x16Eq, Word32Equal, false)
2098       COMPARISON_CASE(Int8x16, kI8x16LtS, Int32LessThan, false)
2099       COMPARISON_CASE(Int8x16, kI8x16LeS, Int32LessThanOrEqual, false)
2100       COMPARISON_CASE(Int8x16, kI8x16GtS, Int32LessThan, true)
2101       COMPARISON_CASE(Int8x16, kI8x16GeS, Int32LessThanOrEqual, true)
2102       COMPARISON_CASE(Int8x16, kI8x16LtU, Uint32LessThan, false)
2103       COMPARISON_CASE(Int8x16, kI8x16LeU, Uint32LessThanOrEqual, false)
2104       COMPARISON_CASE(Int8x16, kI8x16GtU, Uint32LessThan, true)
2105       COMPARISON_CASE(Int8x16, kI8x16GeU, Uint32LessThanOrEqual, true)
2106 #undef COMPARISON_CASE
2107     case IrOpcode::kF64x2Ne: {
2108       LowerNotEqual(node, SimdType::kFloat64x2, machine()->Float64Equal());
2109       break;
2110     }
2111     case IrOpcode::kF32x4Ne: {
2112       LowerNotEqual(node, SimdType::kFloat32x4, machine()->Float32Equal());
2113       break;
2114     }
2115     case IrOpcode::kI32x4Ne: {
2116       LowerNotEqual(node, SimdType::kInt32x4, machine()->Word32Equal());
2117       break;
2118     }
2119     case IrOpcode::kI16x8Ne: {
2120       LowerNotEqual(node, SimdType::kInt16x8, machine()->Word32Equal());
2121       break;
2122     }
2123     case IrOpcode::kI8x16Ne: {
2124       LowerNotEqual(node, SimdType::kInt8x16, machine()->Word32Equal());
2125       break;
2126     }
2127     case IrOpcode::kS128Select: {
2128       DCHECK_EQ(3, node->InputCount());
2129       DCHECK(ReplacementType(node->InputAt(0)) == SimdType::kInt32x4 ||
2130              ReplacementType(node->InputAt(0)) == SimdType::kInt16x8 ||
2131              ReplacementType(node->InputAt(0)) == SimdType::kInt8x16);
2132       Node** boolean_input =
2133           GetReplacementsWithType(node->InputAt(0), rep_type);
2134       Node** rep_left = GetReplacementsWithType(node->InputAt(1), rep_type);
2135       Node** rep_right = GetReplacementsWithType(node->InputAt(2), rep_type);
2136       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
2137       for (int i = 0; i < num_lanes; ++i) {
2138         Node* tmp1 =
2139             graph()->NewNode(machine()->Word32Xor(), rep_left[i], rep_right[i]);
2140         Node* tmp2 =
2141             graph()->NewNode(machine()->Word32And(), boolean_input[i], tmp1);
2142         rep_node[i] =
2143             graph()->NewNode(machine()->Word32Xor(), rep_right[i], tmp2);
2144       }
2145       ReplaceNode(node, rep_node, num_lanes);
2146       break;
2147     }
2148     case IrOpcode::kI8x16Swizzle: {
2149       DCHECK_EQ(2, node->InputCount());
2150       Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
2151       Node** indices = GetReplacementsWithType(node->InputAt(1), rep_type);
2152       Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
2153       Node* stack_slot = graph()->NewNode(
2154           machine()->StackSlot(MachineRepresentation::kSimd128));
2155 
2156       // Push all num_lanes values into stack slot.
2157       const Operator* store_op = machine()->Store(
2158           StoreRepresentation(MachineRepresentation::kWord8, kNoWriteBarrier));
2159       Node* effect_input = graph()->start();
2160       for (int i = num_lanes - 1; i >= 0; i--) {
2161         // We want all the stores to happen first before any of the loads
2162         // below, so connect them via effect edge from i-1 to i.
2163         Node* store =
2164             graph()->NewNode(store_op, stack_slot, mcgraph_->Int32Constant(i),
2165                              rep_left[i], effect_input, graph()->start());
2166         effect_input = store;
2167       }
2168 
2169       for (int i = num_lanes - 1; i >= 0; i--) {
2170         // Only select lane when index is < num_lanes, otherwise write 0 to
2171         // lane. Use Uint32 to take care of negative indices.
2172         Diamond d(graph(), common(),
2173                   graph()->NewNode(machine()->Uint32LessThan(), indices[i],
2174                                    mcgraph_->Int32Constant(num_lanes)));
2175 
2176         Node* load =
2177             graph()->NewNode(machine()->Load(LoadRepresentation::Uint8()),
2178                              stack_slot, indices[i], effect_input, d.if_true);
2179 
2180         rep_nodes[i] = d.Phi(MachineRepresentation::kWord8, load,
2181                              mcgraph_->Int32Constant(0));
2182       }
2183 
2184       ReplaceNode(node, rep_nodes, num_lanes);
2185       break;
2186     }
2187     case IrOpcode::kI8x16Shuffle: {
2188       DCHECK_EQ(2, node->InputCount());
2189       S128ImmediateParameter shuffle = S128ImmediateParameterOf(node->op());
2190       Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
2191       Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
2192       Node** rep_node = zone()->NewArray<Node*>(16);
2193       for (int i = 0; i < 16; i++) {
2194         int lane = shuffle[i];
2195         rep_node[i] = lane < 16 ? rep_left[lane] : rep_right[lane - 16];
2196       }
2197       ReplaceNode(node, rep_node, 16);
2198       break;
2199     }
2200     case IrOpcode::kV32x4AnyTrue:
2201     case IrOpcode::kV16x8AnyTrue:
2202     case IrOpcode::kV8x16AnyTrue: {
2203       DCHECK_EQ(1, node->InputCount());
2204       // AnyTrue always returns a I32x4, and can work with inputs of any shape,
2205       // but we still need GetReplacementsWithType if input is float.
2206       DCHECK_EQ(ReplacementType(node), SimdType::kInt32x4);
2207       Node** reps = GetReplacementsWithType(node->InputAt(0), rep_type);
2208       Node** rep_node = zone()->NewArray<Node*>(1);
2209       Node* true_node = mcgraph_->Int32Constant(1);
2210       Node* zero = mcgraph_->Int32Constant(0);
2211       Node* tmp_result = zero;
2212       for (int i = 0; i < num_lanes; ++i) {
2213         Diamond d(graph(), common(),
2214                   graph()->NewNode(machine()->Word32Equal(), reps[i], zero));
2215         tmp_result =
2216             d.Phi(MachineRepresentation::kWord32, tmp_result, true_node);
2217       }
2218       rep_node[0] = tmp_result;
2219       ReplaceNode(node, rep_node, 1);
2220       break;
2221     }
2222     case IrOpcode::kV32x4AllTrue: {
2223       LowerAllTrueOp(node, SimdType::kInt32x4);
2224       break;
2225     }
2226     case IrOpcode::kV16x8AllTrue: {
2227       LowerAllTrueOp(node, SimdType::kInt16x8);
2228       break;
2229     }
2230     case IrOpcode::kV8x16AllTrue: {
2231       LowerAllTrueOp(node, SimdType::kInt8x16);
2232       break;
2233     }
2234     case IrOpcode::kI8x16BitMask: {
2235       LowerBitMaskOp(node, rep_type, 7);
2236       break;
2237     }
2238     case IrOpcode::kI16x8BitMask: {
2239       LowerBitMaskOp(node, rep_type, 15);
2240       break;
2241     }
2242     case IrOpcode::kI32x4BitMask: {
2243       LowerBitMaskOp(node, rep_type, 31);
2244       break;
2245     }
2246     case IrOpcode::kI8x16RoundingAverageU:
2247     case IrOpcode::kI16x8RoundingAverageU: {
2248       DCHECK_EQ(2, node->InputCount());
2249       Node** rep_left = GetReplacementsWithType(node->InputAt(0), rep_type);
2250       Node** rep_right = GetReplacementsWithType(node->InputAt(1), rep_type);
2251       int num_lanes = NumLanes(rep_type);
2252       Node** rep_node = zone()->NewArray<Node*>(num_lanes);
2253       // Nodes are stored signed, so mask away the top bits.
2254       // rounding_average(left, right) = (left + right + 1) >> 1
2255       const int bit_mask = num_lanes == 16 ? kMask8 : kMask16;
2256       for (int i = 0; i < num_lanes; ++i) {
2257         Node* mask_left = graph()->NewNode(machine()->Word32And(), rep_left[i],
2258                                            mcgraph_->Int32Constant(bit_mask));
2259         Node* mask_right =
2260             graph()->NewNode(machine()->Word32And(), rep_right[i],
2261                              mcgraph_->Int32Constant(bit_mask));
2262         Node* left_plus_right_plus_one = graph()->NewNode(
2263             machine()->Int32Add(),
2264             graph()->NewNode(machine()->Int32Add(), mask_left, mask_right),
2265             mcgraph_->Int32Constant(1));
2266         rep_node[i] =
2267             graph()->NewNode(machine()->Word32Shr(), left_plus_right_plus_one,
2268                              mcgraph_->Int32Constant(1));
2269       }
2270       ReplaceNode(node, rep_node, num_lanes);
2271       break;
2272     }
2273     default: {
2274       DefaultLowering(node);
2275     }
2276   }
2277 }
2278 
DefaultLowering(Node * node)2279 bool SimdScalarLowering::DefaultLowering(Node* node) {
2280   bool something_changed = false;
2281   for (int i = NodeProperties::PastValueIndex(node) - 1; i >= 0; i--) {
2282     Node* input = node->InputAt(i);
2283     if (HasReplacement(0, input)) {
2284       something_changed = true;
2285       node->ReplaceInput(i, GetReplacements(input)[0]);
2286     }
2287     if (ReplacementCount(input) > 1 && HasReplacement(1, input)) {
2288       something_changed = true;
2289       for (int j = 1; j < ReplacementCount(input); ++j) {
2290         node->InsertInput(zone(), i + j, GetReplacements(input)[j]);
2291       }
2292     }
2293   }
2294   return something_changed;
2295 }
2296 
ReplaceNode(Node * old,Node ** new_nodes,int count)2297 void SimdScalarLowering::ReplaceNode(Node* old, Node** new_nodes, int count) {
2298   replacements_[old->id()].node = zone()->NewArray<Node*>(count);
2299   for (int i = 0; i < count; ++i) {
2300     replacements_[old->id()].node[i] = new_nodes[i];
2301   }
2302   replacements_[old->id()].num_replacements = count;
2303 }
2304 
HasReplacement(size_t index,Node * node)2305 bool SimdScalarLowering::HasReplacement(size_t index, Node* node) {
2306   return replacements_[node->id()].node != nullptr &&
2307          replacements_[node->id()].node[index] != nullptr;
2308 }
2309 
ReplacementType(Node * node)2310 SimdScalarLowering::SimdType SimdScalarLowering::ReplacementType(Node* node) {
2311   return replacements_[node->id()].type;
2312 }
2313 
GetReplacements(Node * node)2314 Node** SimdScalarLowering::GetReplacements(Node* node) {
2315   Node** result = replacements_[node->id()].node;
2316   DCHECK(result);
2317   return result;
2318 }
2319 
ReplacementCount(Node * node)2320 int SimdScalarLowering::ReplacementCount(Node* node) {
2321   return replacements_[node->id()].num_replacements;
2322 }
2323 
Int32ToFloat32(Node ** replacements,Node ** result)2324 void SimdScalarLowering::Int32ToFloat32(Node** replacements, Node** result) {
2325   for (int i = 0; i < kNumLanes32; ++i) {
2326     if (replacements[i] != nullptr) {
2327       result[i] =
2328           graph()->NewNode(machine()->BitcastInt32ToFloat32(), replacements[i]);
2329     } else {
2330       result[i] = nullptr;
2331     }
2332   }
2333 }
2334 
Int64ToFloat64(Node ** replacements,Node ** result)2335 void SimdScalarLowering::Int64ToFloat64(Node** replacements, Node** result) {
2336   for (int i = 0; i < kNumLanes64; ++i) {
2337     if (replacements[i] != nullptr) {
2338       result[i] =
2339           graph()->NewNode(machine()->BitcastInt64ToFloat64(), replacements[i]);
2340     } else {
2341       result[i] = nullptr;
2342     }
2343   }
2344 }
2345 
Float64ToInt64(Node ** replacements,Node ** result)2346 void SimdScalarLowering::Float64ToInt64(Node** replacements, Node** result) {
2347   for (int i = 0; i < kNumLanes64; ++i) {
2348     if (replacements[i] != nullptr) {
2349       result[i] =
2350           graph()->NewNode(machine()->BitcastFloat64ToInt64(), replacements[i]);
2351     } else {
2352       result[i] = nullptr;
2353     }
2354   }
2355 }
2356 
Float32ToInt32(Node ** replacements,Node ** result)2357 void SimdScalarLowering::Float32ToInt32(Node** replacements, Node** result) {
2358   for (int i = 0; i < kNumLanes32; ++i) {
2359     if (replacements[i] != nullptr) {
2360       result[i] =
2361           graph()->NewNode(machine()->BitcastFloat32ToInt32(), replacements[i]);
2362     } else {
2363       result[i] = nullptr;
2364     }
2365   }
2366 }
2367 
Int64ToInt32(Node ** replacements,Node ** result)2368 void SimdScalarLowering::Int64ToInt32(Node** replacements, Node** result) {
2369   const int num_ints = sizeof(int64_t) / sizeof(int32_t);
2370   const int bit_size = sizeof(int32_t) * 8;
2371   const Operator* truncate = machine()->TruncateInt64ToInt32();
2372 
2373   for (int i = 0; i < kNumLanes64; i++) {
2374     if (replacements[i] != nullptr) {
2375       for (int j = 0; j < num_ints; j++) {
2376         result[num_ints * i + j] = graph()->NewNode(
2377             truncate, graph()->NewNode(machine()->Word64Sar(), replacements[i],
2378                                        mcgraph_->Int32Constant(j * bit_size)));
2379       }
2380     } else {
2381       for (int j = 0; j < num_ints; j++) {
2382         result[num_ints * i + j] = nullptr;
2383       }
2384     }
2385   }
2386 }
2387 
2388 template <typename T>
Int32ToSmallerInt(Node ** replacements,Node ** result)2389 void SimdScalarLowering::Int32ToSmallerInt(Node** replacements, Node** result) {
2390   const int num_ints = sizeof(int32_t) / sizeof(T);
2391   const int bit_size = sizeof(T) * 8;
2392   const Operator* sign_extend;
2393   switch (sizeof(T)) {
2394     case 1:
2395       sign_extend = machine()->SignExtendWord8ToInt32();
2396       break;
2397     case 2:
2398       sign_extend = machine()->SignExtendWord16ToInt32();
2399       break;
2400     default:
2401       UNREACHABLE();
2402   }
2403 
2404   for (int i = 0; i < kNumLanes32; i++) {
2405     if (replacements[i] != nullptr) {
2406       for (int j = 0; j < num_ints; j++) {
2407         result[num_ints * i + j] = graph()->NewNode(
2408             sign_extend,
2409             graph()->NewNode(machine()->Word32Shr(), replacements[i],
2410                              mcgraph_->Int32Constant(j * bit_size)));
2411       }
2412     } else {
2413       for (int j = 0; j < num_ints; j++) {
2414         result[num_ints * i + j] = nullptr;
2415       }
2416     }
2417   }
2418 }
2419 
2420 template <typename T>
SmallerIntToInt32(Node ** replacements,Node ** result)2421 void SimdScalarLowering::SmallerIntToInt32(Node** replacements, Node** result) {
2422   const int num_ints = sizeof(int32_t) / sizeof(T);
2423   const int bit_size = sizeof(T) * 8;
2424   const int bit_mask = (1 << bit_size) - 1;
2425 
2426   for (int i = 0; i < kNumLanes32; ++i) {
2427     result[i] = mcgraph_->Int32Constant(0);
2428     for (int j = 0; j < num_ints; j++) {
2429       if (replacements[num_ints * i + j] != nullptr) {
2430         Node* clean_bits = graph()->NewNode(machine()->Word32And(),
2431                                             replacements[num_ints * i + j],
2432                                             mcgraph_->Int32Constant(bit_mask));
2433         Node* shift = graph()->NewNode(machine()->Word32Shl(), clean_bits,
2434                                        mcgraph_->Int32Constant(j * bit_size));
2435         result[i] = graph()->NewNode(machine()->Word32Or(), result[i], shift);
2436       }
2437     }
2438   }
2439 }
2440 
Int32ToInt64(Node ** replacements,Node ** result)2441 void SimdScalarLowering::Int32ToInt64(Node** replacements, Node** result) {
2442   const int num_ints = sizeof(int64_t) / sizeof(int32_t);
2443 
2444   for (int i = 0; i < kNumLanes64; i++) {
2445     Node* i64 = graph()->NewNode(machine()->ChangeUint32ToUint64(),
2446                                  replacements[num_ints * i + 1]);
2447     Node* high = graph()->NewNode(machine()->Word64Shl(), i64,
2448                                   mcgraph_->Int32Constant(32));
2449     Node* i64_low = graph()->NewNode(machine()->ChangeUint32ToUint64(),
2450                                      replacements[num_ints * i]);
2451     result[i] = graph()->NewNode(machine()->Word64Or(), high, i64_low);
2452   }
2453 }
2454 
GetReplacementsWithType(Node * node,SimdType type)2455 Node** SimdScalarLowering::GetReplacementsWithType(Node* node, SimdType type) {
2456   // Operations like extract lane, bitmask, any_true, all_true replaces a SIMD
2457   // node with a scalar. Those won't be correctly handled here. They should be
2458   // special cased and replaced with the appropriate scalar.
2459   DCHECK_LT(1, ReplacementCount(node));
2460 
2461   Node** replacements = GetReplacements(node);
2462   if (type == ReplacementType(node)) {
2463     return replacements;
2464   }
2465 
2466   int num_lanes = NumLanes(type);
2467   Node** result = zone()->NewArray<Node*>(num_lanes);
2468 
2469   switch (type) {
2470     case SimdType::kInt64x2: {
2471       switch (ReplacementType(node)) {
2472         case SimdType::kInt64x2: {
2473           UNREACHABLE();
2474         }
2475         case SimdType::kInt32x4: {
2476           Int32ToInt64(replacements, result);
2477           break;
2478         }
2479         case SimdType::kInt16x8: {
2480           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2481           SmallerIntToInt32<int16_t>(replacements, to_int32);
2482           Int32ToInt64(to_int32, result);
2483           break;
2484         }
2485         case SimdType::kInt8x16: {
2486           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2487           SmallerIntToInt32<int8_t>(replacements, to_int32);
2488           Int32ToInt64(to_int32, result);
2489           break;
2490         }
2491         case SimdType::kFloat64x2: {
2492           Float64ToInt64(replacements, result);
2493           break;
2494         }
2495         case SimdType::kFloat32x4: {
2496           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2497           Float32ToInt32(replacements, to_int32);
2498           Int32ToInt64(to_int32, result);
2499           break;
2500         }
2501       }
2502       break;
2503     }
2504     case SimdType::kInt32x4: {
2505       switch (ReplacementType(node)) {
2506         case SimdType::kInt64x2: {
2507           Int64ToInt32(replacements, result);
2508           break;
2509         }
2510         case SimdType::kInt32x4: {
2511           UNREACHABLE();
2512         }
2513         case SimdType::kInt16x8: {
2514           SmallerIntToInt32<int16_t>(replacements, result);
2515           break;
2516         }
2517         case SimdType::kInt8x16: {
2518           SmallerIntToInt32<int8_t>(replacements, result);
2519           break;
2520         }
2521         case SimdType::kFloat64x2: {
2522           Node** float64_to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2523           Float64ToInt64(replacements, float64_to_int64);
2524           Int64ToInt32(float64_to_int64, result);
2525           break;
2526         }
2527         case SimdType::kFloat32x4: {
2528           Float32ToInt32(replacements, result);
2529           break;
2530         }
2531       }
2532       break;
2533     }
2534     case SimdType::kInt16x8: {
2535       switch (ReplacementType(node)) {
2536         case SimdType::kInt64x2: {
2537           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2538           Int64ToInt32(replacements, to_int32);
2539           Int32ToSmallerInt<int16_t>(to_int32, result);
2540           break;
2541         }
2542         case SimdType::kInt32x4: {
2543           Int32ToSmallerInt<int16_t>(replacements, result);
2544           break;
2545         }
2546         case SimdType::kInt16x8: {
2547           UNREACHABLE();
2548         }
2549         case SimdType::kInt8x16: {
2550           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2551           SmallerIntToInt32<int8_t>(replacements, to_int32);
2552           Int32ToSmallerInt<int16_t>(to_int32, result);
2553           break;
2554         }
2555         case SimdType::kFloat64x2: {
2556           Node** to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2557           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2558           Float64ToInt64(replacements, to_int64);
2559           Int64ToInt32(to_int64, to_int32);
2560           Int32ToSmallerInt<int16_t>(to_int32, result);
2561           break;
2562         }
2563         case SimdType::kFloat32x4: {
2564           Node** float32_to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2565           Float32ToInt32(replacements, float32_to_int32);
2566           Int32ToSmallerInt<int16_t>(float32_to_int32, result);
2567           break;
2568         }
2569       }
2570       break;
2571     }
2572     case SimdType::kInt8x16: {
2573       switch (ReplacementType(node)) {
2574         case SimdType::kInt64x2: {
2575           Node** int64_to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2576           Int64ToInt32(replacements, int64_to_int32);
2577           Int32ToSmallerInt<int8_t>(int64_to_int32, result);
2578           break;
2579         }
2580         case SimdType::kInt32x4: {
2581           Int32ToSmallerInt<int8_t>(replacements, result);
2582           break;
2583         }
2584         case SimdType::kInt16x8: {
2585           Node** int16_to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2586           SmallerIntToInt32<int16_t>(replacements, int16_to_int32);
2587           Int32ToSmallerInt<int8_t>(int16_to_int32, result);
2588           break;
2589         }
2590         case SimdType::kInt8x16: {
2591           UNREACHABLE();
2592         }
2593         case SimdType::kFloat64x2: {
2594           Node** to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2595           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2596           Float64ToInt64(replacements, to_int64);
2597           Int64ToInt32(to_int64, to_int32);
2598           Int32ToSmallerInt<int8_t>(to_int32, result);
2599           break;
2600         }
2601         case SimdType::kFloat32x4: {
2602           Node** float32_to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2603           Float32ToInt32(replacements, float32_to_int32);
2604           Int32ToSmallerInt<int8_t>(float32_to_int32, result);
2605           break;
2606         }
2607       }
2608       break;
2609     }
2610     case SimdType::kFloat64x2: {
2611       switch (ReplacementType(node)) {
2612         case SimdType::kInt64x2: {
2613           Int64ToFloat64(replacements, result);
2614           break;
2615         }
2616         case SimdType::kInt32x4: {
2617           Node** int32_to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2618           Int32ToInt64(replacements, int32_to_int64);
2619           Int64ToFloat64(int32_to_int64, result);
2620           break;
2621         }
2622         case SimdType::kInt16x8: {
2623           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2624           Node** to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2625           SmallerIntToInt32<int16_t>(replacements, to_int32);
2626           Int32ToInt64(to_int32, to_int64);
2627           Int64ToFloat64(to_int64, result);
2628           break;
2629         }
2630         case SimdType::kInt8x16: {
2631           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2632           Node** to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2633           SmallerIntToInt32<int8_t>(replacements, to_int32);
2634           Int32ToInt64(to_int32, to_int64);
2635           Int64ToFloat64(to_int64, result);
2636           break;
2637         }
2638         case SimdType::kFloat64x2: {
2639           UNREACHABLE();
2640         }
2641         case SimdType::kFloat32x4: {
2642           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2643           Node** to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2644           Float32ToInt32(replacements, to_int32);
2645           Int32ToInt64(to_int32, to_int64);
2646           Int64ToFloat64(to_int64, result);
2647           break;
2648         }
2649       }
2650       break;
2651     }
2652     case SimdType::kFloat32x4: {
2653       switch (ReplacementType(node)) {
2654         case SimdType::kInt64x2: {
2655           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2656           Int64ToInt32(replacements, to_int32);
2657           Int32ToFloat32(to_int32, result);
2658           break;
2659         }
2660         case SimdType::kInt32x4: {
2661           Int32ToFloat32(replacements, result);
2662           break;
2663         }
2664         case SimdType::kInt16x8: {
2665           Node** to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2666           SmallerIntToInt32<int16_t>(replacements, to_int32);
2667           Int32ToFloat32(to_int32, result);
2668           break;
2669         }
2670         case SimdType::kInt8x16: {
2671           SmallerIntToInt32<int8_t>(replacements, result);
2672           Int32ToFloat32(result, result);
2673           break;
2674         }
2675         case SimdType::kFloat64x2: {
2676           Node** float64_to_int64 = zone()->NewArray<Node*>(kNumLanes64);
2677           Node** int64_to_int32 = zone()->NewArray<Node*>(kNumLanes32);
2678           Float64ToInt64(replacements, float64_to_int64);
2679           Int64ToInt32(float64_to_int64, int64_to_int32);
2680           Int32ToFloat32(int64_to_int32, result);
2681           break;
2682         }
2683         case SimdType::kFloat32x4: {
2684           UNREACHABLE();
2685         }
2686       }
2687       break;
2688     }
2689   }
2690   return result;
2691 }
2692 
PreparePhiReplacement(Node * phi)2693 void SimdScalarLowering::PreparePhiReplacement(Node* phi) {
2694   MachineRepresentation rep = PhiRepresentationOf(phi->op());
2695   if (rep == MachineRepresentation::kSimd128) {
2696     // We have to create the replacements for a phi node before we actually
2697     // lower the phi to break potential cycles in the graph. The replacements of
2698     // input nodes do not exist yet, so we use a placeholder node to pass the
2699     // graph verifier.
2700     int value_count = phi->op()->ValueInputCount();
2701     SimdType type = ReplacementType(phi);
2702     int num_lanes = NumLanes(type);
2703     Node*** inputs_rep = zone()->NewArray<Node**>(num_lanes);
2704     for (int i = 0; i < num_lanes; ++i) {
2705       inputs_rep[i] = zone()->NewArray<Node*>(value_count + 1);
2706       inputs_rep[i][value_count] = NodeProperties::GetControlInput(phi, 0);
2707     }
2708     for (int i = 0; i < value_count; ++i) {
2709       for (int j = 0; j < num_lanes; ++j) {
2710         inputs_rep[j][i] = placeholder_;
2711       }
2712     }
2713     Node** rep_nodes = zone()->NewArray<Node*>(num_lanes);
2714     for (int i = 0; i < num_lanes; ++i) {
2715       rep_nodes[i] = graph()->NewNode(
2716           common()->Phi(MachineTypeFrom(type).representation(), value_count),
2717           value_count + 1, inputs_rep[i], false);
2718     }
2719     ReplaceNode(phi, rep_nodes, num_lanes);
2720   }
2721 }
2722 }  // namespace compiler
2723 }  // namespace internal
2724 }  // namespace v8
2725