• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <assert.h>  // For assert
6 #include <limits.h>  // For LONG_MIN, LONG_MAX.
7 
8 #if V8_TARGET_ARCH_S390
9 
10 #include "src/base/bits.h"
11 #include "src/base/division-by-constant.h"
12 #include "src/codegen/callable.h"
13 #include "src/codegen/code-factory.h"
14 #include "src/codegen/external-reference-table.h"
15 #include "src/codegen/interface-descriptors-inl.h"
16 #include "src/codegen/macro-assembler.h"
17 #include "src/codegen/register-configuration.h"
18 #include "src/debug/debug.h"
19 #include "src/deoptimizer/deoptimizer.h"
20 #include "src/execution/frames-inl.h"
21 #include "src/heap/memory-chunk.h"
22 #include "src/init/bootstrapper.h"
23 #include "src/logging/counters.h"
24 #include "src/objects/smi.h"
25 #include "src/runtime/runtime.h"
26 #include "src/snapshot/snapshot.h"
27 
28 #if V8_ENABLE_WEBASSEMBLY
29 #include "src/wasm/wasm-code-manager.h"
30 #endif  // V8_ENABLE_WEBASSEMBLY
31 
32 // Satisfy cpplint check, but don't include platform-specific header. It is
33 // included recursively via macro-assembler.h.
34 #if 0
35 #include "src/codegen/s390/macro-assembler-s390.h"
36 #endif
37 
38 namespace v8 {
39 namespace internal {
40 
41 namespace {
42 
43 // For WebAssembly we care about the full floating point (Simd) registers. If we
44 // are not running Wasm, we can get away with saving half of those (F64)
45 // registers.
46 #if V8_ENABLE_WEBASSEMBLY
47 constexpr int kStackSavedSavedFPSizeInBytes =
48     kNumCallerSavedDoubles * kSimd128Size;
49 #else
50 constexpr int kStackSavedSavedFPSizeInBytes =
51     kNumCallerSavedDoubles * kDoubleSize;
52 #endif  // V8_ENABLE_WEBASSEMBLY
53 
54 }  // namespace
55 
DoubleMax(DoubleRegister result_reg,DoubleRegister left_reg,DoubleRegister right_reg)56 void TurboAssembler::DoubleMax(DoubleRegister result_reg,
57                                DoubleRegister left_reg,
58                                DoubleRegister right_reg) {
59   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
60     vfmax(result_reg, left_reg, right_reg, Condition(1), Condition(8),
61           Condition(3));
62     return;
63   }
64 
65   Label check_zero, return_left, return_right, return_nan, done;
66   cdbr(left_reg, right_reg);
67   bunordered(&return_nan, Label::kNear);
68   beq(&check_zero);
69   bge(&return_left, Label::kNear);
70   b(&return_right, Label::kNear);
71 
72   bind(&check_zero);
73   lzdr(kDoubleRegZero);
74   cdbr(left_reg, kDoubleRegZero);
75   /* left == right != 0. */
76   bne(&return_left, Label::kNear);
77   /* At this point, both left and right are either 0 or -0. */
78   /* N.B. The following works because +0 + -0 == +0 */
79   /* For max we want logical-and of sign bit: (L + R) */
80   ldr(result_reg, left_reg);
81   adbr(result_reg, right_reg);
82   b(&done, Label::kNear);
83 
84   bind(&return_nan);
85   /* If left or right are NaN, adbr propagates the appropriate one.*/
86   adbr(left_reg, right_reg);
87   b(&return_left, Label::kNear);
88 
89   bind(&return_right);
90   if (right_reg != result_reg) {
91     ldr(result_reg, right_reg);
92   }
93   b(&done, Label::kNear);
94 
95   bind(&return_left);
96   if (left_reg != result_reg) {
97     ldr(result_reg, left_reg);
98   }
99   bind(&done);
100 }
101 
DoubleMin(DoubleRegister result_reg,DoubleRegister left_reg,DoubleRegister right_reg)102 void TurboAssembler::DoubleMin(DoubleRegister result_reg,
103                                DoubleRegister left_reg,
104                                DoubleRegister right_reg) {
105   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
106     vfmin(result_reg, left_reg, right_reg, Condition(1), Condition(8),
107           Condition(3));
108     return;
109   }
110   Label check_zero, return_left, return_right, return_nan, done;
111   cdbr(left_reg, right_reg);
112   bunordered(&return_nan, Label::kNear);
113   beq(&check_zero);
114   ble(&return_left, Label::kNear);
115   b(&return_right, Label::kNear);
116 
117   bind(&check_zero);
118   lzdr(kDoubleRegZero);
119   cdbr(left_reg, kDoubleRegZero);
120   /* left == right != 0. */
121   bne(&return_left, Label::kNear);
122   /* At this point, both left and right are either 0 or -0. */
123   /* N.B. The following works because +0 + -0 == +0 */
124   /* For min we want logical-or of sign bit: -(-L + -R) */
125   lcdbr(left_reg, left_reg);
126   ldr(result_reg, left_reg);
127   if (left_reg == right_reg) {
128     adbr(result_reg, right_reg);
129   } else {
130     sdbr(result_reg, right_reg);
131   }
132   lcdbr(result_reg, result_reg);
133   b(&done, Label::kNear);
134 
135   bind(&return_nan);
136   /* If left or right are NaN, adbr propagates the appropriate one.*/
137   adbr(left_reg, right_reg);
138   b(&return_left, Label::kNear);
139 
140   bind(&return_right);
141   if (right_reg != result_reg) {
142     ldr(result_reg, right_reg);
143   }
144   b(&done, Label::kNear);
145 
146   bind(&return_left);
147   if (left_reg != result_reg) {
148     ldr(result_reg, left_reg);
149   }
150   bind(&done);
151 }
152 
FloatMax(DoubleRegister result_reg,DoubleRegister left_reg,DoubleRegister right_reg)153 void TurboAssembler::FloatMax(DoubleRegister result_reg,
154                               DoubleRegister left_reg,
155                               DoubleRegister right_reg) {
156   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
157     vfmax(result_reg, left_reg, right_reg, Condition(1), Condition(8),
158           Condition(2));
159     return;
160   }
161   Label check_zero, return_left, return_right, return_nan, done;
162   cebr(left_reg, right_reg);
163   bunordered(&return_nan, Label::kNear);
164   beq(&check_zero);
165   bge(&return_left, Label::kNear);
166   b(&return_right, Label::kNear);
167 
168   bind(&check_zero);
169   lzdr(kDoubleRegZero);
170   cebr(left_reg, kDoubleRegZero);
171   /* left == right != 0. */
172   bne(&return_left, Label::kNear);
173   /* At this point, both left and right are either 0 or -0. */
174   /* N.B. The following works because +0 + -0 == +0 */
175   /* For max we want logical-and of sign bit: (L + R) */
176   ldr(result_reg, left_reg);
177   aebr(result_reg, right_reg);
178   b(&done, Label::kNear);
179 
180   bind(&return_nan);
181   /* If left or right are NaN, aebr propagates the appropriate one.*/
182   aebr(left_reg, right_reg);
183   b(&return_left, Label::kNear);
184 
185   bind(&return_right);
186   if (right_reg != result_reg) {
187     ldr(result_reg, right_reg);
188   }
189   b(&done, Label::kNear);
190 
191   bind(&return_left);
192   if (left_reg != result_reg) {
193     ldr(result_reg, left_reg);
194   }
195   bind(&done);
196 }
197 
FloatMin(DoubleRegister result_reg,DoubleRegister left_reg,DoubleRegister right_reg)198 void TurboAssembler::FloatMin(DoubleRegister result_reg,
199                               DoubleRegister left_reg,
200                               DoubleRegister right_reg) {
201   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
202     vfmin(result_reg, left_reg, right_reg, Condition(1), Condition(8),
203           Condition(2));
204     return;
205   }
206 
207   Label check_zero, return_left, return_right, return_nan, done;
208   cebr(left_reg, right_reg);
209   bunordered(&return_nan, Label::kNear);
210   beq(&check_zero);
211   ble(&return_left, Label::kNear);
212   b(&return_right, Label::kNear);
213 
214   bind(&check_zero);
215   lzdr(kDoubleRegZero);
216   cebr(left_reg, kDoubleRegZero);
217   /* left == right != 0. */
218   bne(&return_left, Label::kNear);
219   /* At this point, both left and right are either 0 or -0. */
220   /* N.B. The following works because +0 + -0 == +0 */
221   /* For min we want logical-or of sign bit: -(-L + -R) */
222   lcebr(left_reg, left_reg);
223   ldr(result_reg, left_reg);
224   if (left_reg == right_reg) {
225     aebr(result_reg, right_reg);
226   } else {
227     sebr(result_reg, right_reg);
228   }
229   lcebr(result_reg, result_reg);
230   b(&done, Label::kNear);
231 
232   bind(&return_nan);
233   /* If left or right are NaN, aebr propagates the appropriate one.*/
234   aebr(left_reg, right_reg);
235   b(&return_left, Label::kNear);
236 
237   bind(&return_right);
238   if (right_reg != result_reg) {
239     ldr(result_reg, right_reg);
240   }
241   b(&done, Label::kNear);
242 
243   bind(&return_left);
244   if (left_reg != result_reg) {
245     ldr(result_reg, left_reg);
246   }
247   bind(&done);
248 }
249 
CeilF32(DoubleRegister dst,DoubleRegister src)250 void TurboAssembler::CeilF32(DoubleRegister dst, DoubleRegister src) {
251   fiebra(ROUND_TOWARD_POS_INF, dst, src);
252 }
253 
CeilF64(DoubleRegister dst,DoubleRegister src)254 void TurboAssembler::CeilF64(DoubleRegister dst, DoubleRegister src) {
255   fidbra(ROUND_TOWARD_POS_INF, dst, src);
256 }
257 
FloorF32(DoubleRegister dst,DoubleRegister src)258 void TurboAssembler::FloorF32(DoubleRegister dst, DoubleRegister src) {
259   fiebra(ROUND_TOWARD_NEG_INF, dst, src);
260 }
261 
FloorF64(DoubleRegister dst,DoubleRegister src)262 void TurboAssembler::FloorF64(DoubleRegister dst, DoubleRegister src) {
263   fidbra(ROUND_TOWARD_NEG_INF, dst, src);
264 }
265 
TruncF32(DoubleRegister dst,DoubleRegister src)266 void TurboAssembler::TruncF32(DoubleRegister dst, DoubleRegister src) {
267   fiebra(ROUND_TOWARD_0, dst, src);
268 }
269 
TruncF64(DoubleRegister dst,DoubleRegister src)270 void TurboAssembler::TruncF64(DoubleRegister dst, DoubleRegister src) {
271   fidbra(ROUND_TOWARD_0, dst, src);
272 }
273 
NearestIntF32(DoubleRegister dst,DoubleRegister src)274 void TurboAssembler::NearestIntF32(DoubleRegister dst, DoubleRegister src) {
275   fiebra(ROUND_TO_NEAREST_TO_EVEN, dst, src);
276 }
277 
NearestIntF64(DoubleRegister dst,DoubleRegister src)278 void TurboAssembler::NearestIntF64(DoubleRegister dst, DoubleRegister src) {
279   fidbra(ROUND_TO_NEAREST_TO_EVEN, dst, src);
280 }
281 
RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,Register exclusion1,Register exclusion2,Register exclusion3) const282 int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
283                                                     Register exclusion1,
284                                                     Register exclusion2,
285                                                     Register exclusion3) const {
286   int bytes = 0;
287 
288   RegList exclusions = {exclusion1, exclusion2, exclusion3};
289   RegList list = kJSCallerSaved - exclusions;
290   bytes += list.Count() * kSystemPointerSize;
291 
292   if (fp_mode == SaveFPRegsMode::kSave) {
293     bytes += kStackSavedSavedFPSizeInBytes;
294   }
295 
296   return bytes;
297 }
298 
PushCallerSaved(SaveFPRegsMode fp_mode,Register scratch,Register exclusion1,Register exclusion2,Register exclusion3)299 int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
300                                     Register exclusion1, Register exclusion2,
301                                     Register exclusion3) {
302   int bytes = 0;
303 
304   RegList exclusions = {exclusion1, exclusion2, exclusion3};
305   RegList list = kJSCallerSaved - exclusions;
306   MultiPush(list);
307   bytes += list.Count() * kSystemPointerSize;
308 
309   if (fp_mode == SaveFPRegsMode::kSave) {
310     MultiPushF64OrV128(kCallerSavedDoubles, scratch);
311     bytes += kStackSavedSavedFPSizeInBytes;
312   }
313 
314   return bytes;
315 }
316 
PopCallerSaved(SaveFPRegsMode fp_mode,Register scratch,Register exclusion1,Register exclusion2,Register exclusion3)317 int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
318                                    Register exclusion1, Register exclusion2,
319                                    Register exclusion3) {
320   int bytes = 0;
321   if (fp_mode == SaveFPRegsMode::kSave) {
322     MultiPopF64OrV128(kCallerSavedDoubles, scratch);
323     bytes += kStackSavedSavedFPSizeInBytes;
324   }
325 
326   RegList exclusions = {exclusion1, exclusion2, exclusion3};
327   RegList list = kJSCallerSaved - exclusions;
328   MultiPop(list);
329   bytes += list.Count() * kSystemPointerSize;
330 
331   return bytes;
332 }
333 
LoadFromConstantsTable(Register destination,int constant_index)334 void TurboAssembler::LoadFromConstantsTable(Register destination,
335                                             int constant_index) {
336   DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
337 
338   const uint32_t offset = FixedArray::kHeaderSize +
339                           constant_index * kSystemPointerSize - kHeapObjectTag;
340 
341   CHECK(is_uint19(offset));
342   DCHECK_NE(destination, r0);
343   LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
344   LoadTaggedPointerField(
345       destination,
346       FieldMemOperand(destination,
347                       FixedArray::OffsetOfElementAt(constant_index)),
348       r1);
349 }
350 
LoadRootRelative(Register destination,int32_t offset)351 void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
352   LoadU64(destination, MemOperand(kRootRegister, offset));
353 }
354 
LoadRootRegisterOffset(Register destination,intptr_t offset)355 void TurboAssembler::LoadRootRegisterOffset(Register destination,
356                                             intptr_t offset) {
357   if (offset == 0) {
358     mov(destination, kRootRegister);
359   } else if (is_uint12(offset)) {
360     la(destination, MemOperand(kRootRegister, offset));
361   } else {
362     DCHECK(is_int20(offset));
363     lay(destination, MemOperand(kRootRegister, offset));
364   }
365 }
366 
Jump(Register target,Condition cond)367 void TurboAssembler::Jump(Register target, Condition cond) { b(cond, target); }
368 
Jump(intptr_t target,RelocInfo::Mode rmode,Condition cond)369 void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
370                           Condition cond) {
371   Label skip;
372 
373   if (cond != al) b(NegateCondition(cond), &skip);
374 
375   mov(ip, Operand(target, rmode));
376   b(ip);
377 
378   bind(&skip);
379 }
380 
Jump(Address target,RelocInfo::Mode rmode,Condition cond)381 void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode,
382                           Condition cond) {
383   DCHECK(!RelocInfo::IsCodeTarget(rmode));
384   Jump(static_cast<intptr_t>(target), rmode, cond);
385 }
386 
Jump(Handle<Code> code,RelocInfo::Mode rmode,Condition cond)387 void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
388                           Condition cond) {
389   DCHECK(RelocInfo::IsCodeTarget(rmode));
390   DCHECK_IMPLIES(options().isolate_independent_code,
391                  Builtins::IsIsolateIndependentBuiltin(*code));
392 
393   Builtin builtin = Builtin::kNoBuiltinId;
394   bool target_is_builtin =
395       isolate()->builtins()->IsBuiltinHandle(code, &builtin);
396 
397   if (options().inline_offheap_trampolines && target_is_builtin) {
398     // Inline the trampoline.
399     RecordCommentForOffHeapTrampoline(builtin);
400     mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
401     b(cond, ip);
402     return;
403   }
404   jump(code, RelocInfo::RELATIVE_CODE_TARGET, cond);
405 }
406 
Jump(const ExternalReference & reference)407 void TurboAssembler::Jump(const ExternalReference& reference) {
408   UseScratchRegisterScope temps(this);
409   Register scratch = temps.Acquire();
410   Move(scratch, reference);
411   Jump(scratch);
412 }
413 
Call(Register target)414 void TurboAssembler::Call(Register target) {
415   // Branch to target via indirect branch
416   basr(r14, target);
417 }
418 
CallJSEntry(Register target)419 void MacroAssembler::CallJSEntry(Register target) {
420   DCHECK(target == r4);
421   Call(target);
422 }
423 
CallSizeNotPredictableCodeSize(Address target,RelocInfo::Mode rmode,Condition cond)424 int MacroAssembler::CallSizeNotPredictableCodeSize(Address target,
425                                                    RelocInfo::Mode rmode,
426                                                    Condition cond) {
427   // S390 Assembler::move sequence is IILF / IIHF
428   int size;
429 #if V8_TARGET_ARCH_S390X
430   size = 14;  // IILF + IIHF + BASR
431 #else
432   size = 8;  // IILF + BASR
433 #endif
434   return size;
435 }
436 
Call(Address target,RelocInfo::Mode rmode,Condition cond)437 void TurboAssembler::Call(Address target, RelocInfo::Mode rmode,
438                           Condition cond) {
439   DCHECK(cond == al);
440 
441   mov(ip, Operand(target, rmode));
442   basr(r14, ip);
443 }
444 
Call(Handle<Code> code,RelocInfo::Mode rmode,Condition cond)445 void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
446                           Condition cond) {
447   DCHECK(RelocInfo::IsCodeTarget(rmode) && cond == al);
448 
449   DCHECK_IMPLIES(options().isolate_independent_code,
450                  Builtins::IsIsolateIndependentBuiltin(*code));
451   Builtin builtin = Builtin::kNoBuiltinId;
452   bool target_is_builtin =
453       isolate()->builtins()->IsBuiltinHandle(code, &builtin);
454 
455   if (target_is_builtin && options().inline_offheap_trampolines) {
456     // Inline the trampoline.
457     CallBuiltin(builtin);
458     return;
459   }
460   DCHECK(code->IsExecutable());
461   call(code, rmode);
462 }
463 
CallBuiltin(Builtin builtin)464 void TurboAssembler::CallBuiltin(Builtin builtin) {
465   ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
466   DCHECK(Builtins::IsBuiltinId(builtin));
467   // Use ip directly instead of using UseScratchRegisterScope, as we do not
468   // preserve scratch registers across calls.
469   mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
470   Call(ip);
471 }
472 
TailCallBuiltin(Builtin builtin)473 void TurboAssembler::TailCallBuiltin(Builtin builtin) {
474   ASM_CODE_COMMENT_STRING(this,
475                           CommentForOffHeapTrampoline("tail call", builtin));
476   mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
477   b(ip);
478 }
479 
Drop(int count)480 void TurboAssembler::Drop(int count) {
481   if (count > 0) {
482     int total = count * kSystemPointerSize;
483     if (is_uint12(total)) {
484       la(sp, MemOperand(sp, total));
485     } else if (is_int20(total)) {
486       lay(sp, MemOperand(sp, total));
487     } else {
488       AddS64(sp, Operand(total));
489     }
490   }
491 }
492 
Drop(Register count,Register scratch)493 void TurboAssembler::Drop(Register count, Register scratch) {
494   ShiftLeftU64(scratch, count, Operand(kSystemPointerSizeLog2));
495   AddS64(sp, sp, scratch);
496 }
497 
Call(Label * target)498 void TurboAssembler::Call(Label* target) { b(r14, target); }
499 
Push(Handle<HeapObject> handle)500 void TurboAssembler::Push(Handle<HeapObject> handle) {
501   mov(r0, Operand(handle));
502   push(r0);
503 }
504 
Push(Smi smi)505 void TurboAssembler::Push(Smi smi) {
506   mov(r0, Operand(smi));
507   push(r0);
508 }
509 
Move(Register dst,Handle<HeapObject> value,RelocInfo::Mode rmode)510 void TurboAssembler::Move(Register dst, Handle<HeapObject> value,
511                           RelocInfo::Mode rmode) {
512   // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
513   // non-isolate-independent code. In many cases it might be cheaper than
514   // embedding the relocatable value.
515   if (root_array_available_ && options().isolate_independent_code) {
516     IndirectLoadConstant(dst, value);
517     return;
518   } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) {
519     EmbeddedObjectIndex index = AddEmbeddedObject(value);
520     DCHECK(is_uint32(index));
521     mov(dst, Operand(static_cast<int>(index), rmode));
522   } else {
523     DCHECK(RelocInfo::IsFullEmbeddedObject(rmode));
524     mov(dst, Operand(value.address(), rmode));
525   }
526 }
527 
Move(Register dst,ExternalReference reference)528 void TurboAssembler::Move(Register dst, ExternalReference reference) {
529   // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
530   // non-isolate-independent code. In many cases it might be cheaper than
531   // embedding the relocatable value.
532   if (root_array_available_ && options().isolate_independent_code) {
533     IndirectLoadExternalReference(dst, reference);
534     return;
535   }
536   mov(dst, Operand(reference));
537 }
538 
Move(Register dst,Register src,Condition cond)539 void TurboAssembler::Move(Register dst, Register src, Condition cond) {
540   if (dst != src) {
541     if (cond == al) {
542       mov(dst, src);
543     } else {
544       LoadOnConditionP(cond, dst, src);
545     }
546   }
547 }
548 
Move(DoubleRegister dst,DoubleRegister src)549 void TurboAssembler::Move(DoubleRegister dst, DoubleRegister src) {
550   if (dst != src) {
551     ldr(dst, src);
552   }
553 }
554 
Move(Register dst,const MemOperand & src)555 void TurboAssembler::Move(Register dst, const MemOperand& src) {
556   LoadU64(dst, src);
557 }
558 
559 // Wrapper around Assembler::mvc (SS-a format)
MoveChar(const MemOperand & opnd1,const MemOperand & opnd2,const Operand & length)560 void TurboAssembler::MoveChar(const MemOperand& opnd1, const MemOperand& opnd2,
561                               const Operand& length) {
562   mvc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
563 }
564 
565 // Wrapper around Assembler::clc (SS-a format)
CompareLogicalChar(const MemOperand & opnd1,const MemOperand & opnd2,const Operand & length)566 void TurboAssembler::CompareLogicalChar(const MemOperand& opnd1,
567                                         const MemOperand& opnd2,
568                                         const Operand& length) {
569   clc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
570 }
571 
572 // Wrapper around Assembler::xc (SS-a format)
ExclusiveOrChar(const MemOperand & opnd1,const MemOperand & opnd2,const Operand & length)573 void TurboAssembler::ExclusiveOrChar(const MemOperand& opnd1,
574                                      const MemOperand& opnd2,
575                                      const Operand& length) {
576   xc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
577 }
578 
579 // Wrapper around Assembler::risbg(n) (RIE-f)
RotateInsertSelectBits(Register dst,Register src,const Operand & startBit,const Operand & endBit,const Operand & shiftAmt,bool zeroBits)580 void TurboAssembler::RotateInsertSelectBits(Register dst, Register src,
581                                             const Operand& startBit,
582                                             const Operand& endBit,
583                                             const Operand& shiftAmt,
584                                             bool zeroBits) {
585   if (zeroBits)
586     // High tag the top bit of I4/EndBit to zero out any unselected bits
587     risbg(dst, src, startBit,
588           Operand(static_cast<intptr_t>(endBit.immediate() | 0x80)), shiftAmt);
589   else
590     risbg(dst, src, startBit, endBit, shiftAmt);
591 }
592 
BranchRelativeOnIdxHighP(Register dst,Register inc,Label * L)593 void TurboAssembler::BranchRelativeOnIdxHighP(Register dst, Register inc,
594                                               Label* L) {
595 #if V8_TARGET_ARCH_S390X
596   brxhg(dst, inc, L);
597 #else
598   brxh(dst, inc, L);
599 #endif  // V8_TARGET_ARCH_S390X
600 }
601 
PushArray(Register array,Register size,Register scratch,Register scratch2,PushArrayOrder order)602 void TurboAssembler::PushArray(Register array, Register size, Register scratch,
603                                Register scratch2, PushArrayOrder order) {
604   Label loop, done;
605 
606   if (order == kNormal) {
607     ShiftLeftU64(scratch, size, Operand(kSystemPointerSizeLog2));
608     lay(scratch, MemOperand(array, scratch));
609     bind(&loop);
610     CmpS64(array, scratch);
611     bge(&done);
612     lay(scratch, MemOperand(scratch, -kSystemPointerSize));
613     lay(sp, MemOperand(sp, -kSystemPointerSize));
614     MoveChar(MemOperand(sp), MemOperand(scratch), Operand(kSystemPointerSize));
615     b(&loop);
616     bind(&done);
617   } else {
618     DCHECK_NE(scratch2, r0);
619     ShiftLeftU64(scratch, size, Operand(kSystemPointerSizeLog2));
620     lay(scratch, MemOperand(array, scratch));
621     mov(scratch2, array);
622     bind(&loop);
623     CmpS64(scratch2, scratch);
624     bge(&done);
625     lay(sp, MemOperand(sp, -kSystemPointerSize));
626     MoveChar(MemOperand(sp), MemOperand(scratch2), Operand(kSystemPointerSize));
627     lay(scratch2, MemOperand(scratch2, kSystemPointerSize));
628     b(&loop);
629     bind(&done);
630   }
631 }
632 
MultiPush(RegList regs,Register location)633 void TurboAssembler::MultiPush(RegList regs, Register location) {
634   int16_t num_to_push = regs.Count();
635   int16_t stack_offset = num_to_push * kSystemPointerSize;
636 
637   SubS64(location, location, Operand(stack_offset));
638   for (int16_t i = Register::kNumRegisters - 1; i >= 0; i--) {
639     if ((regs.bits() & (1 << i)) != 0) {
640       stack_offset -= kSystemPointerSize;
641       StoreU64(ToRegister(i), MemOperand(location, stack_offset));
642     }
643   }
644 }
645 
MultiPop(RegList regs,Register location)646 void TurboAssembler::MultiPop(RegList regs, Register location) {
647   int16_t stack_offset = 0;
648 
649   for (int16_t i = 0; i < Register::kNumRegisters; i++) {
650     if ((regs.bits() & (1 << i)) != 0) {
651       LoadU64(ToRegister(i), MemOperand(location, stack_offset));
652       stack_offset += kSystemPointerSize;
653     }
654   }
655   AddS64(location, location, Operand(stack_offset));
656 }
657 
MultiPushDoubles(DoubleRegList dregs,Register location)658 void TurboAssembler::MultiPushDoubles(DoubleRegList dregs, Register location) {
659   int16_t num_to_push = dregs.Count();
660   int16_t stack_offset = num_to_push * kDoubleSize;
661 
662   SubS64(location, location, Operand(stack_offset));
663   for (int16_t i = DoubleRegister::kNumRegisters - 1; i >= 0; i--) {
664     if ((dregs.bits() & (1 << i)) != 0) {
665       DoubleRegister dreg = DoubleRegister::from_code(i);
666       stack_offset -= kDoubleSize;
667       StoreF64(dreg, MemOperand(location, stack_offset));
668     }
669   }
670 }
671 
MultiPushV128(DoubleRegList dregs,Register scratch,Register location)672 void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register scratch,
673                                    Register location) {
674   int16_t num_to_push = dregs.Count();
675   int16_t stack_offset = num_to_push * kSimd128Size;
676 
677   SubS64(location, location, Operand(stack_offset));
678   for (int16_t i = Simd128Register::kNumRegisters - 1; i >= 0; i--) {
679     if ((dregs.bits() & (1 << i)) != 0) {
680       Simd128Register dreg = Simd128Register::from_code(i);
681       stack_offset -= kSimd128Size;
682       StoreV128(dreg, MemOperand(location, stack_offset), scratch);
683     }
684   }
685 }
686 
MultiPopDoubles(DoubleRegList dregs,Register location)687 void TurboAssembler::MultiPopDoubles(DoubleRegList dregs, Register location) {
688   int16_t stack_offset = 0;
689 
690   for (int16_t i = 0; i < DoubleRegister::kNumRegisters; i++) {
691     if ((dregs.bits() & (1 << i)) != 0) {
692       DoubleRegister dreg = DoubleRegister::from_code(i);
693       LoadF64(dreg, MemOperand(location, stack_offset));
694       stack_offset += kDoubleSize;
695     }
696   }
697   AddS64(location, location, Operand(stack_offset));
698 }
699 
MultiPopV128(DoubleRegList dregs,Register scratch,Register location)700 void TurboAssembler::MultiPopV128(DoubleRegList dregs, Register scratch,
701                                   Register location) {
702   int16_t stack_offset = 0;
703 
704   for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) {
705     if ((dregs.bits() & (1 << i)) != 0) {
706       Simd128Register dreg = Simd128Register::from_code(i);
707       LoadV128(dreg, MemOperand(location, stack_offset), scratch);
708       stack_offset += kSimd128Size;
709     }
710   }
711   AddS64(location, location, Operand(stack_offset));
712 }
713 
MultiPushF64OrV128(DoubleRegList dregs,Register scratch,Register location)714 void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs, Register scratch,
715                                         Register location) {
716 #if V8_ENABLE_WEBASSEMBLY
717   bool generating_bultins =
718       isolate() && isolate()->IsGeneratingEmbeddedBuiltins();
719   if (generating_bultins) {
720     Label push_doubles, simd_pushed;
721     Move(r1, ExternalReference::supports_wasm_simd_128_address());
722     LoadU8(r1, MemOperand(r1));
723     LoadAndTestP(r1, r1);  // If > 0 then simd is available.
724     ble(&push_doubles, Label::kNear);
725     // Save vector registers, don't save double registers anymore.
726     MultiPushV128(dregs, scratch);
727     b(&simd_pushed);
728     bind(&push_doubles);
729     // Simd not supported, only save double registers.
730     MultiPushDoubles(dregs);
731     // We still need to allocate empty space on the stack as if
732     // Simd rgeisters were saved (see kFixedFrameSizeFromFp).
733     lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize)));
734     bind(&simd_pushed);
735   } else {
736     if (CpuFeatures::SupportsWasmSimd128()) {
737       MultiPushV128(dregs, scratch);
738     } else {
739       MultiPushDoubles(dregs);
740       lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize)));
741     }
742   }
743 #else
744   MultiPushDoubles(dregs);
745 #endif
746 }
747 
MultiPopF64OrV128(DoubleRegList dregs,Register scratch,Register location)748 void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register scratch,
749                                        Register location) {
750 #if V8_ENABLE_WEBASSEMBLY
751   bool generating_bultins =
752       isolate() && isolate()->IsGeneratingEmbeddedBuiltins();
753   if (generating_bultins) {
754     Label pop_doubles, simd_popped;
755     Move(r1, ExternalReference::supports_wasm_simd_128_address());
756     LoadU8(r1, MemOperand(r1));
757     LoadAndTestP(r1, r1);  // If > 0 then simd is available.
758     ble(&pop_doubles, Label::kNear);
759     // Pop vector registers, don't pop double registers anymore.
760     MultiPopV128(dregs, scratch);
761     b(&simd_popped);
762     bind(&pop_doubles);
763     // Simd not supported, only pop double registers.
764     lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize));
765     MultiPopDoubles(dregs);
766     bind(&simd_popped);
767   } else {
768     if (CpuFeatures::SupportsWasmSimd128()) {
769       MultiPopV128(dregs, scratch);
770     } else {
771       lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize));
772       MultiPopDoubles(dregs);
773     }
774   }
775 #else
776   MultiPopDoubles(dregs);
777 #endif
778 }
779 
LoadRoot(Register destination,RootIndex index,Condition)780 void TurboAssembler::LoadRoot(Register destination, RootIndex index,
781                               Condition) {
782   LoadU64(destination,
783           MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)), r0);
784 }
785 
LoadTaggedPointerField(const Register & destination,const MemOperand & field_operand,const Register & scratch)786 void TurboAssembler::LoadTaggedPointerField(const Register& destination,
787                                             const MemOperand& field_operand,
788                                             const Register& scratch) {
789   if (COMPRESS_POINTERS_BOOL) {
790     DecompressTaggedPointer(destination, field_operand);
791   } else {
792     LoadU64(destination, field_operand, scratch);
793   }
794 }
795 
LoadAnyTaggedField(const Register & destination,const MemOperand & field_operand,const Register & scratch)796 void TurboAssembler::LoadAnyTaggedField(const Register& destination,
797                                         const MemOperand& field_operand,
798                                         const Register& scratch) {
799   if (COMPRESS_POINTERS_BOOL) {
800     DecompressAnyTagged(destination, field_operand);
801   } else {
802     LoadU64(destination, field_operand, scratch);
803   }
804 }
805 
SmiUntag(Register dst,const MemOperand & src)806 void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
807   if (SmiValuesAre31Bits()) {
808     LoadS32(dst, src);
809   } else {
810     LoadU64(dst, src);
811   }
812   SmiUntag(dst);
813 }
814 
SmiUntagField(Register dst,const MemOperand & src)815 void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
816   SmiUntag(dst, src);
817 }
818 
StoreTaggedField(const Register & value,const MemOperand & dst_field_operand,const Register & scratch)819 void TurboAssembler::StoreTaggedField(const Register& value,
820                                       const MemOperand& dst_field_operand,
821                                       const Register& scratch) {
822   if (COMPRESS_POINTERS_BOOL) {
823     RecordComment("[ StoreTagged");
824     StoreU32(value, dst_field_operand);
825     RecordComment("]");
826   } else {
827     StoreU64(value, dst_field_operand, scratch);
828   }
829 }
830 
DecompressTaggedSigned(Register destination,Register src)831 void TurboAssembler::DecompressTaggedSigned(Register destination,
832                                             Register src) {
833   RecordComment("[ DecompressTaggedSigned");
834   llgfr(destination, src);
835   RecordComment("]");
836 }
837 
DecompressTaggedSigned(Register destination,MemOperand field_operand)838 void TurboAssembler::DecompressTaggedSigned(Register destination,
839                                             MemOperand field_operand) {
840   RecordComment("[ DecompressTaggedSigned");
841   llgf(destination, field_operand);
842   RecordComment("]");
843 }
844 
DecompressTaggedPointer(Register destination,Register source)845 void TurboAssembler::DecompressTaggedPointer(Register destination,
846                                              Register source) {
847   RecordComment("[ DecompressTaggedPointer");
848   llgfr(destination, source);
849   agr(destination, kRootRegister);
850   RecordComment("]");
851 }
852 
DecompressTaggedPointer(Register destination,MemOperand field_operand)853 void TurboAssembler::DecompressTaggedPointer(Register destination,
854                                              MemOperand field_operand) {
855   RecordComment("[ DecompressTaggedPointer");
856   llgf(destination, field_operand);
857   agr(destination, kRootRegister);
858   RecordComment("]");
859 }
860 
DecompressAnyTagged(Register destination,MemOperand field_operand)861 void TurboAssembler::DecompressAnyTagged(Register destination,
862                                          MemOperand field_operand) {
863   RecordComment("[ DecompressAnyTagged");
864   llgf(destination, field_operand);
865   agr(destination, kRootRegister);
866   RecordComment("]");
867 }
868 
DecompressAnyTagged(Register destination,Register source)869 void TurboAssembler::DecompressAnyTagged(Register destination,
870                                          Register source) {
871   RecordComment("[ DecompressAnyTagged");
872   llgfr(destination, source);
873   agr(destination, kRootRegister);
874   RecordComment("]");
875 }
876 
LoadTaggedSignedField(Register destination,MemOperand field_operand)877 void TurboAssembler::LoadTaggedSignedField(Register destination,
878                                            MemOperand field_operand) {
879   if (COMPRESS_POINTERS_BOOL) {
880     DecompressTaggedSigned(destination, field_operand);
881   } else {
882     LoadU64(destination, field_operand);
883   }
884 }
885 
RecordWriteField(Register object,int offset,Register value,Register slot_address,LinkRegisterStatus lr_status,SaveFPRegsMode save_fp,RememberedSetAction remembered_set_action,SmiCheck smi_check)886 void MacroAssembler::RecordWriteField(Register object, int offset,
887                                       Register value, Register slot_address,
888                                       LinkRegisterStatus lr_status,
889                                       SaveFPRegsMode save_fp,
890                                       RememberedSetAction remembered_set_action,
891                                       SmiCheck smi_check) {
892   // First, check if a write barrier is even needed. The tests below
893   // catch stores of Smis.
894   Label done;
895 
896   // Skip barrier if writing a smi.
897   if (smi_check == SmiCheck::kInline) {
898     JumpIfSmi(value, &done);
899   }
900 
901   // Although the object register is tagged, the offset is relative to the start
902   // of the object, so so offset must be a multiple of kSystemPointerSize.
903   DCHECK(IsAligned(offset, kTaggedSize));
904 
905   lay(slot_address, MemOperand(object, offset - kHeapObjectTag));
906   if (FLAG_debug_code) {
907     Label ok;
908     AndP(r0, slot_address, Operand(kTaggedSize - 1));
909     beq(&ok, Label::kNear);
910     stop();
911     bind(&ok);
912   }
913 
914   RecordWrite(object, slot_address, value, lr_status, save_fp,
915               remembered_set_action, SmiCheck::kOmit);
916 
917   bind(&done);
918 
919   // Clobber clobbered input registers when running with the debug-code flag
920   // turned on to provoke errors.
921   if (FLAG_debug_code) {
922     mov(value, Operand(bit_cast<intptr_t>(kZapValue + 4)));
923     mov(slot_address, Operand(bit_cast<intptr_t>(kZapValue + 8)));
924   }
925 }
926 
MaybeSaveRegisters(RegList registers)927 void TurboAssembler::MaybeSaveRegisters(RegList registers) {
928   if (registers.is_empty()) return;
929   MultiPush(registers);
930 }
931 
MaybeRestoreRegisters(RegList registers)932 void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
933   if (registers.is_empty()) return;
934   MultiPop(registers);
935 }
936 
CallEphemeronKeyBarrier(Register object,Register slot_address,SaveFPRegsMode fp_mode)937 void TurboAssembler::CallEphemeronKeyBarrier(Register object,
938                                              Register slot_address,
939                                              SaveFPRegsMode fp_mode) {
940   DCHECK(!AreAliased(object, slot_address));
941   RegList registers =
942       WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
943   MaybeSaveRegisters(registers);
944 
945   Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
946   Register slot_address_parameter =
947       WriteBarrierDescriptor::SlotAddressRegister();
948 
949   Push(object);
950   Push(slot_address);
951   Pop(slot_address_parameter);
952   Pop(object_parameter);
953 
954   Call(isolate()->builtins()->code_handle(
955            Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
956        RelocInfo::CODE_TARGET);
957   MaybeRestoreRegisters(registers);
958 }
959 
CallRecordWriteStubSaveRegisters(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)960 void TurboAssembler::CallRecordWriteStubSaveRegisters(
961     Register object, Register slot_address,
962     RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
963     StubCallMode mode) {
964   DCHECK(!AreAliased(object, slot_address));
965   RegList registers =
966       WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
967   MaybeSaveRegisters(registers);
968 
969   Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
970   Register slot_address_parameter =
971       WriteBarrierDescriptor::SlotAddressRegister();
972 
973   Push(object);
974   Push(slot_address);
975   Pop(slot_address_parameter);
976   Pop(object_parameter);
977 
978   CallRecordWriteStub(object_parameter, slot_address_parameter,
979                       remembered_set_action, fp_mode, mode);
980 
981   MaybeRestoreRegisters(registers);
982 }
983 
CallRecordWriteStub(Register object,Register slot_address,RememberedSetAction remembered_set_action,SaveFPRegsMode fp_mode,StubCallMode mode)984 void TurboAssembler::CallRecordWriteStub(
985     Register object, Register slot_address,
986     RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
987     StubCallMode mode) {
988   // Use CallRecordWriteStubSaveRegisters if the object and slot registers
989   // need to be caller saved.
990   DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
991   DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
992 #if V8_ENABLE_WEBASSEMBLY
993   if (mode == StubCallMode::kCallWasmRuntimeStub) {
994     auto wasm_target =
995         wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
996     Call(wasm_target, RelocInfo::WASM_STUB_CALL);
997 #else
998   if (false) {
999 #endif
1000   } else {
1001     auto builtin_index =
1002         Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
1003     if (options().inline_offheap_trampolines) {
1004       RecordCommentForOffHeapTrampoline(builtin_index);
1005       mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
1006       Call(ip);
1007     } else {
1008       Handle<Code> code_target =
1009           isolate()->builtins()->code_handle(builtin_index);
1010       Call(code_target, RelocInfo::CODE_TARGET);
1011     }
1012   }
1013 }
1014 
1015 // Will clobber 4 registers: object, address, scratch, ip.  The
1016 // register 'object' contains a heap object pointer.  The heap object
1017 // tag is shifted away.
1018 void MacroAssembler::RecordWrite(Register object, Register slot_address,
1019                                  Register value, LinkRegisterStatus lr_status,
1020                                  SaveFPRegsMode fp_mode,
1021                                  RememberedSetAction remembered_set_action,
1022                                  SmiCheck smi_check) {
1023   DCHECK(!AreAliased(object, slot_address, value));
1024   if (FLAG_debug_code) {
1025     LoadTaggedPointerField(r0, MemOperand(slot_address));
1026     CmpS64(value, r0);
1027     Check(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite);
1028   }
1029 
1030   if ((remembered_set_action == RememberedSetAction::kOmit &&
1031        !FLAG_incremental_marking) ||
1032       FLAG_disable_write_barriers) {
1033     return;
1034   }
1035   // First, check if a write barrier is even needed. The tests below
1036   // catch stores of smis and stores into the young generation.
1037   Label done;
1038 
1039   if (smi_check == SmiCheck::kInline) {
1040     JumpIfSmi(value, &done);
1041   }
1042 
1043   CheckPageFlag(value,
1044                 value,  // Used as scratch.
1045                 MemoryChunk::kPointersToHereAreInterestingMask, eq, &done);
1046   CheckPageFlag(object,
1047                 value,  // Used as scratch.
1048                 MemoryChunk::kPointersFromHereAreInterestingMask, eq, &done);
1049 
1050   // Record the actual write.
1051   if (lr_status == kLRHasNotBeenSaved) {
1052     push(r14);
1053   }
1054   CallRecordWriteStubSaveRegisters(object, slot_address, remembered_set_action,
1055                                    fp_mode);
1056   if (lr_status == kLRHasNotBeenSaved) {
1057     pop(r14);
1058   }
1059 
1060   if (FLAG_debug_code) mov(slot_address, Operand(kZapValue));
1061 
1062   bind(&done);
1063 
1064   // Clobber clobbered registers when running with the debug-code flag
1065   // turned on to provoke errors.
1066   if (FLAG_debug_code) {
1067     mov(slot_address, Operand(bit_cast<intptr_t>(kZapValue + 12)));
1068     mov(value, Operand(bit_cast<intptr_t>(kZapValue + 16)));
1069   }
1070 }
1071 
1072 void TurboAssembler::PushCommonFrame(Register marker_reg) {
1073   ASM_CODE_COMMENT(this);
1074   int fp_delta = 0;
1075   CleanseP(r14);
1076   if (marker_reg.is_valid()) {
1077     Push(r14, fp, marker_reg);
1078     fp_delta = 1;
1079   } else {
1080     Push(r14, fp);
1081     fp_delta = 0;
1082   }
1083   la(fp, MemOperand(sp, fp_delta * kSystemPointerSize));
1084 }
1085 
1086 void TurboAssembler::PopCommonFrame(Register marker_reg) {
1087   if (marker_reg.is_valid()) {
1088     Pop(r14, fp, marker_reg);
1089   } else {
1090     Pop(r14, fp);
1091   }
1092 }
1093 
1094 void TurboAssembler::PushStandardFrame(Register function_reg) {
1095   int fp_delta = 0;
1096   CleanseP(r14);
1097   if (function_reg.is_valid()) {
1098     Push(r14, fp, cp, function_reg);
1099     fp_delta = 2;
1100   } else {
1101     Push(r14, fp, cp);
1102     fp_delta = 1;
1103   }
1104   la(fp, MemOperand(sp, fp_delta * kSystemPointerSize));
1105   Push(kJavaScriptCallArgCountRegister);
1106 }
1107 
1108 void TurboAssembler::RestoreFrameStateForTailCall() {
1109   // if (FLAG_enable_embedded_constant_pool) {
1110   //   LoadU64(kConstantPoolRegister,
1111   //         MemOperand(fp, StandardFrameConstants::kConstantPoolOffset));
1112   //   set_constant_pool_available(false);
1113   // }
1114   DCHECK(!FLAG_enable_embedded_constant_pool);
1115   LoadU64(r14, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
1116   LoadU64(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
1117 }
1118 
1119 void TurboAssembler::CanonicalizeNaN(const DoubleRegister dst,
1120                                      const DoubleRegister src) {
1121   // Turn potential sNaN into qNaN
1122   if (dst != src) ldr(dst, src);
1123   lzdr(kDoubleRegZero);
1124   sdbr(dst, kDoubleRegZero);
1125 }
1126 
1127 void TurboAssembler::ConvertIntToDouble(DoubleRegister dst, Register src) {
1128   cdfbr(dst, src);
1129 }
1130 
1131 void TurboAssembler::ConvertUnsignedIntToDouble(DoubleRegister dst,
1132                                                 Register src) {
1133   if (CpuFeatures::IsSupported(FLOATING_POINT_EXT)) {
1134     cdlfbr(Condition(5), Condition(0), dst, src);
1135   } else {
1136     // zero-extend src
1137     llgfr(src, src);
1138     // convert to double
1139     cdgbr(dst, src);
1140   }
1141 }
1142 
1143 void TurboAssembler::ConvertIntToFloat(DoubleRegister dst, Register src) {
1144   cefbra(Condition(4), dst, src);
1145 }
1146 
1147 void TurboAssembler::ConvertUnsignedIntToFloat(DoubleRegister dst,
1148                                                Register src) {
1149   celfbr(Condition(4), Condition(0), dst, src);
1150 }
1151 
1152 void TurboAssembler::ConvertInt64ToFloat(DoubleRegister double_dst,
1153                                          Register src) {
1154   cegbr(double_dst, src);
1155 }
1156 
1157 void TurboAssembler::ConvertInt64ToDouble(DoubleRegister double_dst,
1158                                           Register src) {
1159   cdgbr(double_dst, src);
1160 }
1161 
1162 void TurboAssembler::ConvertUnsignedInt64ToFloat(DoubleRegister double_dst,
1163                                                  Register src) {
1164   celgbr(Condition(0), Condition(0), double_dst, src);
1165 }
1166 
1167 void TurboAssembler::ConvertUnsignedInt64ToDouble(DoubleRegister double_dst,
1168                                                   Register src) {
1169   cdlgbr(Condition(0), Condition(0), double_dst, src);
1170 }
1171 
1172 void TurboAssembler::ConvertFloat32ToInt64(const Register dst,
1173                                            const DoubleRegister double_input,
1174                                            FPRoundingMode rounding_mode) {
1175   Condition m = Condition(0);
1176   switch (rounding_mode) {
1177     case kRoundToZero:
1178       m = Condition(5);
1179       break;
1180     case kRoundToNearest:
1181       UNIMPLEMENTED();
1182     case kRoundToPlusInf:
1183       m = Condition(6);
1184       break;
1185     case kRoundToMinusInf:
1186       m = Condition(7);
1187       break;
1188     default:
1189       UNIMPLEMENTED();
1190   }
1191   cgebr(m, dst, double_input);
1192 }
1193 
1194 void TurboAssembler::ConvertDoubleToInt64(const Register dst,
1195                                           const DoubleRegister double_input,
1196                                           FPRoundingMode rounding_mode) {
1197   Condition m = Condition(0);
1198   switch (rounding_mode) {
1199     case kRoundToZero:
1200       m = Condition(5);
1201       break;
1202     case kRoundToNearest:
1203       UNIMPLEMENTED();
1204     case kRoundToPlusInf:
1205       m = Condition(6);
1206       break;
1207     case kRoundToMinusInf:
1208       m = Condition(7);
1209       break;
1210     default:
1211       UNIMPLEMENTED();
1212   }
1213   cgdbr(m, dst, double_input);
1214 }
1215 
1216 void TurboAssembler::ConvertDoubleToInt32(const Register dst,
1217                                           const DoubleRegister double_input,
1218                                           FPRoundingMode rounding_mode) {
1219   Condition m = Condition(0);
1220   switch (rounding_mode) {
1221     case kRoundToZero:
1222       m = Condition(5);
1223       break;
1224     case kRoundToNearest:
1225       m = Condition(4);
1226       break;
1227     case kRoundToPlusInf:
1228       m = Condition(6);
1229       break;
1230     case kRoundToMinusInf:
1231       m = Condition(7);
1232       break;
1233     default:
1234       UNIMPLEMENTED();
1235   }
1236 #ifdef V8_TARGET_ARCH_S390X
1237   lghi(dst, Operand::Zero());
1238 #endif
1239   cfdbr(m, dst, double_input);
1240 }
1241 
1242 void TurboAssembler::ConvertFloat32ToInt32(const Register result,
1243                                            const DoubleRegister double_input,
1244                                            FPRoundingMode rounding_mode) {
1245   Condition m = Condition(0);
1246   switch (rounding_mode) {
1247     case kRoundToZero:
1248       m = Condition(5);
1249       break;
1250     case kRoundToNearest:
1251       m = Condition(4);
1252       break;
1253     case kRoundToPlusInf:
1254       m = Condition(6);
1255       break;
1256     case kRoundToMinusInf:
1257       m = Condition(7);
1258       break;
1259     default:
1260       UNIMPLEMENTED();
1261   }
1262 #ifdef V8_TARGET_ARCH_S390X
1263   lghi(result, Operand::Zero());
1264 #endif
1265   cfebr(m, result, double_input);
1266 }
1267 
1268 void TurboAssembler::ConvertFloat32ToUnsignedInt32(
1269     const Register result, const DoubleRegister double_input,
1270     FPRoundingMode rounding_mode) {
1271   Condition m = Condition(0);
1272   switch (rounding_mode) {
1273     case kRoundToZero:
1274       m = Condition(5);
1275       break;
1276     case kRoundToNearest:
1277       UNIMPLEMENTED();
1278     case kRoundToPlusInf:
1279       m = Condition(6);
1280       break;
1281     case kRoundToMinusInf:
1282       m = Condition(7);
1283       break;
1284     default:
1285       UNIMPLEMENTED();
1286   }
1287 #ifdef V8_TARGET_ARCH_S390X
1288   lghi(result, Operand::Zero());
1289 #endif
1290   clfebr(m, Condition(0), result, double_input);
1291 }
1292 
1293 void TurboAssembler::ConvertFloat32ToUnsignedInt64(
1294     const Register result, const DoubleRegister double_input,
1295     FPRoundingMode rounding_mode) {
1296   Condition m = Condition(0);
1297   switch (rounding_mode) {
1298     case kRoundToZero:
1299       m = Condition(5);
1300       break;
1301     case kRoundToNearest:
1302       UNIMPLEMENTED();
1303     case kRoundToPlusInf:
1304       m = Condition(6);
1305       break;
1306     case kRoundToMinusInf:
1307       m = Condition(7);
1308       break;
1309     default:
1310       UNIMPLEMENTED();
1311   }
1312   clgebr(m, Condition(0), result, double_input);
1313 }
1314 
1315 void TurboAssembler::ConvertDoubleToUnsignedInt64(
1316     const Register dst, const DoubleRegister double_input,
1317     FPRoundingMode rounding_mode) {
1318   Condition m = Condition(0);
1319   switch (rounding_mode) {
1320     case kRoundToZero:
1321       m = Condition(5);
1322       break;
1323     case kRoundToNearest:
1324       UNIMPLEMENTED();
1325     case kRoundToPlusInf:
1326       m = Condition(6);
1327       break;
1328     case kRoundToMinusInf:
1329       m = Condition(7);
1330       break;
1331     default:
1332       UNIMPLEMENTED();
1333   }
1334   clgdbr(m, Condition(0), dst, double_input);
1335 }
1336 
1337 void TurboAssembler::ConvertDoubleToUnsignedInt32(
1338     const Register dst, const DoubleRegister double_input,
1339     FPRoundingMode rounding_mode) {
1340   Condition m = Condition(0);
1341   switch (rounding_mode) {
1342     case kRoundToZero:
1343       m = Condition(5);
1344       break;
1345     case kRoundToNearest:
1346       UNIMPLEMENTED();
1347     case kRoundToPlusInf:
1348       m = Condition(6);
1349       break;
1350     case kRoundToMinusInf:
1351       m = Condition(7);
1352       break;
1353     default:
1354       UNIMPLEMENTED();
1355   }
1356 #ifdef V8_TARGET_ARCH_S390X
1357   lghi(dst, Operand::Zero());
1358 #endif
1359   clfdbr(m, Condition(0), dst, double_input);
1360 }
1361 
1362 void TurboAssembler::MovDoubleToInt64(Register dst, DoubleRegister src) {
1363   lgdr(dst, src);
1364 }
1365 
1366 void TurboAssembler::MovInt64ToDouble(DoubleRegister dst, Register src) {
1367   ldgr(dst, src);
1368 }
1369 
1370 void TurboAssembler::StubPrologue(StackFrame::Type type, Register base,
1371                                   int prologue_offset) {
1372   {
1373     ConstantPoolUnavailableScope constant_pool_unavailable(this);
1374     mov(r1, Operand(StackFrame::TypeToMarker(type)));
1375     PushCommonFrame(r1);
1376   }
1377 }
1378 
1379 void TurboAssembler::Prologue(Register base, int prologue_offset) {
1380   DCHECK(base != no_reg);
1381   PushStandardFrame(r3);
1382 }
1383 
1384 void TurboAssembler::DropArguments(Register count, ArgumentsCountType type,
1385                                    ArgumentsCountMode mode) {
1386   int receiver_bytes =
1387       (mode == kCountExcludesReceiver) ? kSystemPointerSize : 0;
1388   switch (type) {
1389     case kCountIsInteger: {
1390       ShiftLeftU64(ip, count, Operand(kSystemPointerSizeLog2));
1391       lay(sp, MemOperand(sp, ip));
1392       break;
1393     }
1394     case kCountIsSmi: {
1395       STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
1396       SmiToPtrArrayOffset(count, count);
1397       AddS64(sp, sp, count);
1398       break;
1399     }
1400     case kCountIsBytes: {
1401       AddS64(sp, sp, count);
1402       break;
1403     }
1404   }
1405   if (receiver_bytes != 0) {
1406     AddS64(sp, sp, Operand(receiver_bytes));
1407   }
1408 }
1409 
1410 void TurboAssembler::DropArgumentsAndPushNewReceiver(Register argc,
1411                                                      Register receiver,
1412                                                      ArgumentsCountType type,
1413                                                      ArgumentsCountMode mode) {
1414   DCHECK(!AreAliased(argc, receiver));
1415   if (mode == kCountExcludesReceiver) {
1416     // Drop arguments without receiver and override old receiver.
1417     DropArguments(argc, type, kCountIncludesReceiver);
1418     StoreU64(receiver, MemOperand(sp));
1419   } else {
1420     DropArguments(argc, type, mode);
1421     push(receiver);
1422   }
1423 }
1424 
1425 void TurboAssembler::EnterFrame(StackFrame::Type type,
1426                                 bool load_constant_pool_pointer_reg) {
1427   ASM_CODE_COMMENT(this);
1428   // We create a stack frame with:
1429   //    Return Addr <-- old sp
1430   //    Old FP      <-- new fp
1431   //    CP
1432   //    type
1433   //    CodeObject  <-- new sp
1434 
1435   Register scratch = no_reg;
1436   if (!StackFrame::IsJavaScript(type)) {
1437     scratch = ip;
1438     mov(scratch, Operand(StackFrame::TypeToMarker(type)));
1439   }
1440   PushCommonFrame(scratch);
1441 #if V8_ENABLE_WEBASSEMBLY
1442   if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
1443 #endif  // V8_ENABLE_WEBASSEMBLY
1444 }
1445 
1446 int TurboAssembler::LeaveFrame(StackFrame::Type type, int stack_adjustment) {
1447   ASM_CODE_COMMENT(this);
1448   // Drop the execution stack down to the frame pointer and restore
1449   // the caller frame pointer, return address and constant pool pointer.
1450   LoadU64(r14, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
1451   if (is_int20(StandardFrameConstants::kCallerSPOffset + stack_adjustment)) {
1452     lay(r1, MemOperand(fp, StandardFrameConstants::kCallerSPOffset +
1453                                stack_adjustment));
1454   } else {
1455     AddS64(r1, fp,
1456            Operand(StandardFrameConstants::kCallerSPOffset + stack_adjustment));
1457   }
1458   LoadU64(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
1459   mov(sp, r1);
1460   int frame_ends = pc_offset();
1461   return frame_ends;
1462 }
1463 
1464 // ExitFrame layout (probably wrongish.. needs updating)
1465 //
1466 //  SP -> previousSP
1467 //        LK reserved
1468 //        sp_on_exit (for debug?)
1469 // oldSP->prev SP
1470 //        LK
1471 //        <parameters on stack>
1472 
1473 // Prior to calling EnterExitFrame, we've got a bunch of parameters
1474 // on the stack that we need to wrap a real frame around.. so first
1475 // we reserve a slot for LK and push the previous SP which is captured
1476 // in the fp register (r11)
1477 // Then - we buy a new frame
1478 
1479 // r14
1480 // oldFP <- newFP
1481 // SP
1482 // Floats
1483 // gaps
1484 // Args
1485 // ABIRes <- newSP
1486 void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
1487                                     StackFrame::Type frame_type) {
1488   DCHECK(frame_type == StackFrame::EXIT ||
1489          frame_type == StackFrame::BUILTIN_EXIT);
1490   // Set up the frame structure on the stack.
1491   DCHECK_EQ(2 * kSystemPointerSize, ExitFrameConstants::kCallerSPDisplacement);
1492   DCHECK_EQ(1 * kSystemPointerSize, ExitFrameConstants::kCallerPCOffset);
1493   DCHECK_EQ(0 * kSystemPointerSize, ExitFrameConstants::kCallerFPOffset);
1494   DCHECK_GT(stack_space, 0);
1495 
1496   // This is an opportunity to build a frame to wrap
1497   // all of the pushes that have happened inside of V8
1498   // since we were called from C code
1499   CleanseP(r14);
1500   mov(r1, Operand(StackFrame::TypeToMarker(frame_type)));
1501   PushCommonFrame(r1);
1502   // Reserve room for saved entry sp.
1503   lay(sp, MemOperand(fp, -ExitFrameConstants::kFixedFrameSizeFromFp));
1504 
1505   if (FLAG_debug_code) {
1506     StoreU64(MemOperand(fp, ExitFrameConstants::kSPOffset), Operand::Zero(),
1507              r1);
1508   }
1509 
1510   // Save the frame pointer and the context in top.
1511   Move(r1, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
1512                                      isolate()));
1513   StoreU64(fp, MemOperand(r1));
1514   Move(r1,
1515        ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1516   StoreU64(cp, MemOperand(r1));
1517 
1518   // Optionally save all volatile double registers.
1519   if (save_doubles) {
1520     MultiPushDoubles(kCallerSavedDoubles);
1521     // Note that d0 will be accessible at
1522     //   fp - ExitFrameConstants::kFrameSize -
1523     //   kNumCallerSavedDoubles * kDoubleSize,
1524     // since the sp slot and code slot were pushed after the fp.
1525   }
1526 
1527   lay(sp, MemOperand(sp, -stack_space * kSystemPointerSize));
1528 
1529   // Allocate and align the frame preparing for calling the runtime
1530   // function.
1531   const int frame_alignment = TurboAssembler::ActivationFrameAlignment();
1532   if (frame_alignment > 0) {
1533     DCHECK_EQ(frame_alignment, 8);
1534     ClearRightImm(sp, sp, Operand(3));  // equivalent to &= -8
1535   }
1536 
1537   lay(sp, MemOperand(sp, -kNumRequiredStackFrameSlots * kSystemPointerSize));
1538   StoreU64(MemOperand(sp), Operand::Zero(), r0);
1539   // Set the exit frame sp value to point just before the return address
1540   // location.
1541   lay(r1, MemOperand(sp, kStackFrameSPSlot * kSystemPointerSize));
1542   StoreU64(r1, MemOperand(fp, ExitFrameConstants::kSPOffset));
1543 }
1544 
1545 int TurboAssembler::ActivationFrameAlignment() {
1546 #if !defined(USE_SIMULATOR)
1547   // Running on the real platform. Use the alignment as mandated by the local
1548   // environment.
1549   // Note: This will break if we ever start generating snapshots on one S390
1550   // platform for another S390 platform with a different alignment.
1551   return base::OS::ActivationFrameAlignment();
1552 #else  // Simulated
1553   // If we are using the simulator then we should always align to the expected
1554   // alignment. As the simulator is used to generate snapshots we do not know
1555   // if the target platform will need alignment, so this is controlled from a
1556   // flag.
1557   return FLAG_sim_stack_alignment;
1558 #endif
1559 }
1560 
1561 void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
1562                                     bool argument_count_is_length) {
1563   // Optionally restore all double registers.
1564   if (save_doubles) {
1565     // Calculate the stack location of the saved doubles and restore them.
1566     const int kNumRegs = kNumCallerSavedDoubles;
1567     lay(r5, MemOperand(fp, -(ExitFrameConstants::kFixedFrameSizeFromFp +
1568                              kNumRegs * kDoubleSize)));
1569     MultiPopDoubles(kCallerSavedDoubles, r5);
1570   }
1571 
1572   // Clear top frame.
1573   Move(ip, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
1574                                      isolate()));
1575   StoreU64(MemOperand(ip), Operand(0, RelocInfo::NO_INFO), r0);
1576 
1577   // Restore current context from top and clear it in debug mode.
1578   Move(ip,
1579        ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1580   LoadU64(cp, MemOperand(ip));
1581 
1582 #ifdef DEBUG
1583   mov(r1, Operand(Context::kInvalidContext));
1584   Move(ip,
1585        ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1586   StoreU64(r1, MemOperand(ip));
1587 #endif
1588 
1589   // Tear down the exit frame, pop the arguments, and return.
1590   LeaveFrame(StackFrame::EXIT);
1591 
1592   if (argument_count.is_valid()) {
1593     if (!argument_count_is_length) {
1594       ShiftLeftU64(argument_count, argument_count,
1595                    Operand(kSystemPointerSizeLog2));
1596     }
1597     la(sp, MemOperand(sp, argument_count));
1598   }
1599 }
1600 
1601 void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
1602   Move(dst, d0);
1603 }
1604 
1605 void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
1606   Move(dst, d0);
1607 }
1608 
1609 MemOperand MacroAssembler::StackLimitAsMemOperand(StackLimitKind kind) {
1610   DCHECK(root_array_available());
1611   Isolate* isolate = this->isolate();
1612   ExternalReference limit =
1613       kind == StackLimitKind::kRealStackLimit
1614           ? ExternalReference::address_of_real_jslimit(isolate)
1615           : ExternalReference::address_of_jslimit(isolate);
1616   DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
1617 
1618   intptr_t offset =
1619       TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
1620   CHECK(is_int32(offset));
1621   return MemOperand(kRootRegister, offset);
1622 }
1623 
1624 void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch,
1625                                         Label* stack_overflow) {
1626   // Check the stack for overflow. We are not trying to catch
1627   // interruptions (e.g. debug break and preemption) here, so the "real stack
1628   // limit" is checked.
1629   LoadU64(scratch, StackLimitAsMemOperand(StackLimitKind::kRealStackLimit));
1630   // Make scratch the space we have left. The stack might already be overflowed
1631   // here which will cause scratch to become negative.
1632   SubS64(scratch, sp, scratch);
1633   // Check if the arguments will overflow the stack.
1634   ShiftLeftU64(r0, num_args, Operand(kSystemPointerSizeLog2));
1635   CmpS64(scratch, r0);
1636   ble(stack_overflow);  // Signed comparison.
1637 }
1638 
1639 void MacroAssembler::InvokePrologue(Register expected_parameter_count,
1640                                     Register actual_parameter_count,
1641                                     Label* done, InvokeType type) {
1642   Label regular_invoke;
1643 
1644   //  r2: actual arguments count
1645   //  r3: function (passed through to callee)
1646   //  r4: expected arguments count
1647 
1648   DCHECK_EQ(actual_parameter_count, r2);
1649   DCHECK_EQ(expected_parameter_count, r4);
1650 
1651   // If the expected parameter count is equal to the adaptor sentinel, no need
1652   // to push undefined value as arguments.
1653   if (kDontAdaptArgumentsSentinel != 0) {
1654     CmpS64(expected_parameter_count, Operand(kDontAdaptArgumentsSentinel));
1655     beq(&regular_invoke);
1656   }
1657 
1658   // If overapplication or if the actual argument count is equal to the
1659   // formal parameter count, no need to push extra undefined values.
1660   SubS64(expected_parameter_count, expected_parameter_count,
1661          actual_parameter_count);
1662   ble(&regular_invoke);
1663 
1664   Label stack_overflow;
1665   Register scratch = r6;
1666   StackOverflowCheck(expected_parameter_count, scratch, &stack_overflow);
1667 
1668   // Underapplication. Move the arguments already in the stack, including the
1669   // receiver and the return address.
1670   {
1671     Label copy, check;
1672     Register num = r7, src = r8, dest = ip;  // r7 and r8 are context and root.
1673     mov(src, sp);
1674     // Update stack pointer.
1675     ShiftLeftU64(scratch, expected_parameter_count,
1676                  Operand(kSystemPointerSizeLog2));
1677     SubS64(sp, sp, scratch);
1678     mov(dest, sp);
1679     ltgr(num, actual_parameter_count);
1680     b(&check);
1681     bind(&copy);
1682     LoadU64(r0, MemOperand(src));
1683     lay(src, MemOperand(src, kSystemPointerSize));
1684     StoreU64(r0, MemOperand(dest));
1685     lay(dest, MemOperand(dest, kSystemPointerSize));
1686     SubS64(num, num, Operand(1));
1687     bind(&check);
1688     b(gt, &copy);
1689   }
1690 
1691   // Fill remaining expected arguments with undefined values.
1692   LoadRoot(scratch, RootIndex::kUndefinedValue);
1693   {
1694     Label loop;
1695     bind(&loop);
1696     StoreU64(scratch, MemOperand(ip));
1697     lay(ip, MemOperand(ip, kSystemPointerSize));
1698     SubS64(expected_parameter_count, expected_parameter_count, Operand(1));
1699     bgt(&loop);
1700   }
1701   b(&regular_invoke);
1702 
1703   bind(&stack_overflow);
1704   {
1705     FrameScope frame(
1706         this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
1707     CallRuntime(Runtime::kThrowStackOverflow);
1708     bkpt(0);
1709   }
1710 
1711   bind(&regular_invoke);
1712 }
1713 
1714 void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
1715                                     Register expected_parameter_count,
1716                                     Register actual_parameter_count) {
1717   Label skip_hook;
1718 
1719   ExternalReference debug_hook_active =
1720       ExternalReference::debug_hook_on_function_call_address(isolate());
1721   Move(r6, debug_hook_active);
1722   tm(MemOperand(r6), Operand(0xFF));
1723   beq(&skip_hook);
1724 
1725   {
1726     // Load receiver to pass it later to DebugOnFunctionCall hook.
1727     LoadReceiver(r6, actual_parameter_count);
1728     FrameScope frame(
1729         this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
1730 
1731     SmiTag(expected_parameter_count);
1732     Push(expected_parameter_count);
1733 
1734     SmiTag(actual_parameter_count);
1735     Push(actual_parameter_count);
1736 
1737     if (new_target.is_valid()) {
1738       Push(new_target);
1739     }
1740     Push(fun, fun, r6);
1741     CallRuntime(Runtime::kDebugOnFunctionCall);
1742     Pop(fun);
1743     if (new_target.is_valid()) {
1744       Pop(new_target);
1745     }
1746 
1747     Pop(actual_parameter_count);
1748     SmiUntag(actual_parameter_count);
1749 
1750     Pop(expected_parameter_count);
1751     SmiUntag(expected_parameter_count);
1752   }
1753   bind(&skip_hook);
1754 }
1755 
1756 void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
1757                                         Register expected_parameter_count,
1758                                         Register actual_parameter_count,
1759                                         InvokeType type) {
1760   // You can't call a function without a valid frame.
1761   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1762   DCHECK_EQ(function, r3);
1763   DCHECK_IMPLIES(new_target.is_valid(), new_target == r5);
1764 
1765   // On function call, call into the debugger if necessary.
1766   CheckDebugHook(function, new_target, expected_parameter_count,
1767                  actual_parameter_count);
1768 
1769   // Clear the new.target register if not given.
1770   if (!new_target.is_valid()) {
1771     LoadRoot(r5, RootIndex::kUndefinedValue);
1772   }
1773 
1774   Label done;
1775   InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
1776   // We call indirectly through the code field in the function to
1777   // allow recompilation to take effect without changing any of the
1778   // call sites.
1779   Register code = kJavaScriptCallCodeStartRegister;
1780   LoadTaggedPointerField(code,
1781                          FieldMemOperand(function, JSFunction::kCodeOffset));
1782   switch (type) {
1783     case InvokeType::kCall:
1784       CallCodeObject(code);
1785       break;
1786     case InvokeType::kJump:
1787       JumpCodeObject(code);
1788       break;
1789   }
1790   // Continue here if InvokePrologue does handle the invocation due to
1791   // mismatched parameter counts.
1792   bind(&done);
1793 }
1794 
1795 void MacroAssembler::InvokeFunctionWithNewTarget(
1796     Register fun, Register new_target, Register actual_parameter_count,
1797     InvokeType type) {
1798   // You can't call a function without a valid frame.
1799   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1800 
1801   // Contract with called JS functions requires that function is passed in r3.
1802   DCHECK_EQ(fun, r3);
1803 
1804   Register expected_reg = r4;
1805   Register temp_reg = r6;
1806   LoadTaggedPointerField(cp, FieldMemOperand(fun, JSFunction::kContextOffset));
1807   LoadTaggedPointerField(
1808       temp_reg, FieldMemOperand(fun, JSFunction::kSharedFunctionInfoOffset));
1809   LoadU16(
1810       expected_reg,
1811       FieldMemOperand(temp_reg,
1812                       SharedFunctionInfo::kFormalParameterCountOffset));
1813 
1814   InvokeFunctionCode(fun, new_target, expected_reg, actual_parameter_count,
1815                      type);
1816 }
1817 
1818 void MacroAssembler::InvokeFunction(Register function,
1819                                     Register expected_parameter_count,
1820                                     Register actual_parameter_count,
1821                                     InvokeType type) {
1822   // You can't call a function without a valid frame.
1823   DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1824 
1825   // Contract with called JS functions requires that function is passed in r3.
1826   DCHECK_EQ(function, r3);
1827 
1828   // Get the function and setup the context.
1829   LoadTaggedPointerField(cp,
1830                          FieldMemOperand(function, JSFunction::kContextOffset));
1831 
1832   InvokeFunctionCode(r3, no_reg, expected_parameter_count,
1833                      actual_parameter_count, type);
1834 }
1835 
1836 void MacroAssembler::PushStackHandler() {
1837   // Adjust this code if not the case.
1838   STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
1839   STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize);
1840 
1841   // Link the current handler as the next handler.
1842   Move(r7,
1843        ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
1844 
1845   // Buy the full stack frame for 5 slots.
1846   lay(sp, MemOperand(sp, -StackHandlerConstants::kSize));
1847 
1848   // Store padding.
1849   lghi(r0, Operand::Zero());
1850   StoreU64(r0, MemOperand(sp));  // Padding.
1851 
1852   // Copy the old handler into the next handler slot.
1853   MoveChar(MemOperand(sp, StackHandlerConstants::kNextOffset), MemOperand(r7),
1854            Operand(kSystemPointerSize));
1855   // Set this new handler as the current one.
1856   StoreU64(sp, MemOperand(r7));
1857 }
1858 
1859 void MacroAssembler::PopStackHandler() {
1860   STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
1861   STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
1862 
1863   // Pop the Next Handler into r3 and store it into Handler Address reference.
1864   Pop(r3);
1865   Move(ip,
1866        ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
1867   StoreU64(r3, MemOperand(ip));
1868 
1869   Drop(1);  // Drop padding.
1870 }
1871 
1872 void MacroAssembler::CompareObjectType(Register object, Register map,
1873                                        Register type_reg, InstanceType type) {
1874   const Register temp = type_reg == no_reg ? r0 : type_reg;
1875 
1876   LoadMap(map, object);
1877   CompareInstanceType(map, temp, type);
1878 }
1879 
1880 void MacroAssembler::CompareInstanceType(Register map, Register type_reg,
1881                                          InstanceType type) {
1882   STATIC_ASSERT(Map::kInstanceTypeOffset < 4096);
1883   STATIC_ASSERT(LAST_TYPE <= 0xFFFF);
1884   LoadS16(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
1885   CmpS64(type_reg, Operand(type));
1886 }
1887 
1888 void MacroAssembler::CompareRange(Register value, unsigned lower_limit,
1889                                   unsigned higher_limit) {
1890   ASM_CODE_COMMENT(this);
1891   DCHECK_LT(lower_limit, higher_limit);
1892   if (lower_limit != 0) {
1893     UseScratchRegisterScope temps(this);
1894     Register scratch = temps.Acquire();
1895     mov(scratch, value);
1896     slgfi(scratch, Operand(lower_limit));
1897     CmpU64(scratch, Operand(higher_limit - lower_limit));
1898   } else {
1899     CmpU64(value, Operand(higher_limit));
1900   }
1901 }
1902 
1903 void MacroAssembler::CompareInstanceTypeRange(Register map, Register type_reg,
1904                                               InstanceType lower_limit,
1905                                               InstanceType higher_limit) {
1906   DCHECK_LT(lower_limit, higher_limit);
1907   LoadU16(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
1908   CompareRange(type_reg, lower_limit, higher_limit);
1909 }
1910 
1911 void MacroAssembler::CompareRoot(Register obj, RootIndex index) {
1912   int32_t offset = RootRegisterOffsetForRootIndex(index);
1913 #ifdef V8_TARGET_BIG_ENDIAN
1914   offset += (COMPRESS_POINTERS_BOOL ? kTaggedSize : 0);
1915 #endif
1916   CompareTagged(obj, MemOperand(kRootRegister, offset));
1917 }
1918 
1919 void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
1920                                      unsigned higher_limit,
1921                                      Label* on_in_range) {
1922   CompareRange(value, lower_limit, higher_limit);
1923   ble(on_in_range);
1924 }
1925 
1926 void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
1927                                        Register result,
1928                                        DoubleRegister double_input,
1929                                        StubCallMode stub_mode) {
1930   Label done;
1931 
1932   TryInlineTruncateDoubleToI(result, double_input, &done);
1933 
1934   // If we fell through then inline version didn't succeed - call stub instead.
1935   push(r14);
1936   // Put input on stack.
1937   lay(sp, MemOperand(sp, -kDoubleSize));
1938   StoreF64(double_input, MemOperand(sp));
1939 
1940 #if V8_ENABLE_WEBASSEMBLY
1941   if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
1942     Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
1943 #else
1944   // For balance.
1945   if (false) {
1946 #endif  // V8_ENABLE_WEBASSEMBLY
1947   } else {
1948     Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
1949   }
1950 
1951   LoadU64(result, MemOperand(sp, 0));
1952   la(sp, MemOperand(sp, kDoubleSize));
1953   pop(r14);
1954 
1955   bind(&done);
1956 }
1957 
1958 void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
1959                                                 DoubleRegister double_input,
1960                                                 Label* done) {
1961   ConvertDoubleToInt64(result, double_input);
1962 
1963   // Test for overflow
1964   TestIfInt32(result);
1965   beq(done);
1966 }
1967 
1968 void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
1969                                  SaveFPRegsMode save_doubles) {
1970   // All parameters are on the stack.  r2 has the return value after call.
1971 
1972   // If the expected number of arguments of the runtime function is
1973   // constant, we check that the actual number of arguments match the
1974   // expectation.
1975   CHECK(f->nargs < 0 || f->nargs == num_arguments);
1976 
1977   // TODO(1236192): Most runtime routines don't need the number of
1978   // arguments passed in because it is constant. At some point we
1979   // should remove this need and make the runtime routine entry code
1980   // smarter.
1981   mov(r2, Operand(num_arguments));
1982   Move(r3, ExternalReference::Create(f));
1983 #if V8_TARGET_ARCH_S390X
1984   Handle<Code> code =
1985       CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
1986 #else
1987   Handle<Code> code = CodeFactory::CEntry(isolate(), 1, save_doubles);
1988 #endif
1989 
1990   Call(code, RelocInfo::CODE_TARGET);
1991 }
1992 
1993 void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
1994   const Runtime::Function* function = Runtime::FunctionForId(fid);
1995   DCHECK_EQ(1, function->result_size);
1996   if (function->nargs >= 0) {
1997     mov(r2, Operand(function->nargs));
1998   }
1999   JumpToExternalReference(ExternalReference::Create(fid));
2000 }
2001 
2002 void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
2003                                              bool builtin_exit_frame) {
2004   Move(r3, builtin);
2005   Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
2006                                           ArgvMode::kStack, builtin_exit_frame);
2007   Jump(code, RelocInfo::CODE_TARGET);
2008 }
2009 
2010 void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
2011   mov(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
2012   Jump(kOffHeapTrampolineRegister);
2013 }
2014 
2015 void MacroAssembler::LoadWeakValue(Register out, Register in,
2016                                    Label* target_if_cleared) {
2017   CmpS32(in, Operand(kClearedWeakHeapObjectLower32));
2018   beq(target_if_cleared);
2019 
2020   AndP(out, in, Operand(~kWeakHeapObjectMask));
2021 }
2022 
2023 void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
2024                                           Register scratch1,
2025                                           Register scratch2) {
2026   DCHECK(value > 0 && is_int8(value));
2027   if (FLAG_native_code_counters && counter->Enabled()) {
2028     Move(scratch2, ExternalReference::Create(counter));
2029     // @TODO(john.yan): can be optimized by asi()
2030     LoadS32(scratch1, MemOperand(scratch2));
2031     AddS64(scratch1, Operand(value));
2032     StoreU32(scratch1, MemOperand(scratch2));
2033   }
2034 }
2035 
2036 void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
2037                                           Register scratch1,
2038                                           Register scratch2) {
2039   DCHECK(value > 0 && is_int8(value));
2040   if (FLAG_native_code_counters && counter->Enabled()) {
2041     Move(scratch2, ExternalReference::Create(counter));
2042     // @TODO(john.yan): can be optimized by asi()
2043     LoadS32(scratch1, MemOperand(scratch2));
2044     AddS64(scratch1, Operand(-value));
2045     StoreU32(scratch1, MemOperand(scratch2));
2046   }
2047 }
2048 
2049 void TurboAssembler::Assert(Condition cond, AbortReason reason, CRegister cr) {
2050   if (FLAG_debug_code) Check(cond, reason, cr);
2051 }
2052 
2053 void TurboAssembler::AssertUnreachable(AbortReason reason) {
2054   if (FLAG_debug_code) Abort(reason);
2055 }
2056 
2057 void TurboAssembler::Check(Condition cond, AbortReason reason, CRegister cr) {
2058   Label L;
2059   b(cond, &L);
2060   Abort(reason);
2061   // will not return here
2062   bind(&L);
2063 }
2064 
2065 void TurboAssembler::Abort(AbortReason reason) {
2066   Label abort_start;
2067   bind(&abort_start);
2068   if (FLAG_code_comments) {
2069     const char* msg = GetAbortReason(reason);
2070     RecordComment("Abort message: ");
2071     RecordComment(msg);
2072   }
2073 
2074   // Avoid emitting call to builtin if requested.
2075   if (trap_on_abort()) {
2076     stop();
2077     return;
2078   }
2079 
2080   if (should_abort_hard()) {
2081     // We don't care if we constructed a frame. Just pretend we did.
2082     FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
2083     lgfi(r2, Operand(static_cast<int>(reason)));
2084     PrepareCallCFunction(1, 0, r3);
2085     Move(r3, ExternalReference::abort_with_reason());
2086     // Use Call directly to avoid any unneeded overhead. The function won't
2087     // return anyway.
2088     Call(r3);
2089     return;
2090   }
2091 
2092   LoadSmiLiteral(r3, Smi::FromInt(static_cast<int>(reason)));
2093 
2094   // Disable stub call restrictions to always allow calls to abort.
2095   if (!has_frame_) {
2096     // We don't actually want to generate a pile of code for this, so just
2097     // claim there is a stack frame, without generating one.
2098     FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
2099     Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
2100   } else {
2101     Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
2102   }
2103   // will not return here
2104 }
2105 
2106 void TurboAssembler::LoadMap(Register destination, Register object) {
2107   LoadTaggedPointerField(destination,
2108                          FieldMemOperand(object, HeapObject::kMapOffset));
2109 }
2110 
2111 void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
2112   LoadMap(dst, cp);
2113   LoadTaggedPointerField(
2114       dst, FieldMemOperand(
2115                dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
2116   LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
2117 }
2118 
2119 void TurboAssembler::AssertNotSmi(Register object) {
2120   if (FLAG_debug_code) {
2121     STATIC_ASSERT(kSmiTag == 0);
2122     TestIfSmi(object);
2123     Check(ne, AbortReason::kOperandIsASmi, cr0);
2124   }
2125 }
2126 
2127 void TurboAssembler::AssertSmi(Register object) {
2128   if (FLAG_debug_code) {
2129     STATIC_ASSERT(kSmiTag == 0);
2130     TestIfSmi(object);
2131     Check(eq, AbortReason::kOperandIsNotASmi, cr0);
2132   }
2133 }
2134 
2135 void MacroAssembler::AssertConstructor(Register object, Register scratch) {
2136   if (FLAG_debug_code) {
2137     STATIC_ASSERT(kSmiTag == 0);
2138     TestIfSmi(object);
2139     Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor);
2140     LoadMap(scratch, object);
2141     tm(FieldMemOperand(scratch, Map::kBitFieldOffset),
2142        Operand(Map::Bits1::IsConstructorBit::kMask));
2143     Check(ne, AbortReason::kOperandIsNotAConstructor);
2144   }
2145 }
2146 
2147 void MacroAssembler::AssertFunction(Register object) {
2148   if (FLAG_debug_code) {
2149     STATIC_ASSERT(kSmiTag == 0);
2150     TestIfSmi(object);
2151     Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, cr0);
2152     push(object);
2153     LoadMap(object, object);
2154     CompareInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE,
2155                              LAST_JS_FUNCTION_TYPE);
2156     pop(object);
2157     Check(le, AbortReason::kOperandIsNotAFunction);
2158   }
2159 }
2160 
2161 void MacroAssembler::AssertCallableFunction(Register object) {
2162   if (!FLAG_debug_code) return;
2163   ASM_CODE_COMMENT(this);
2164   STATIC_ASSERT(kSmiTag == 0);
2165   TestIfSmi(object);
2166   Check(ne, AbortReason::kOperandIsASmiAndNotAFunction);
2167   push(object);
2168   LoadMap(object, object);
2169   CompareInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE,
2170                            LAST_CALLABLE_JS_FUNCTION_TYPE);
2171   pop(object);
2172   Check(le, AbortReason::kOperandIsNotACallableFunction);
2173 }
2174 
2175 void MacroAssembler::AssertBoundFunction(Register object) {
2176   if (FLAG_debug_code) {
2177     STATIC_ASSERT(kSmiTag == 0);
2178     TestIfSmi(object);
2179     Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, cr0);
2180     push(object);
2181     CompareObjectType(object, object, object, JS_BOUND_FUNCTION_TYPE);
2182     pop(object);
2183     Check(eq, AbortReason::kOperandIsNotABoundFunction);
2184   }
2185 }
2186 
2187 void MacroAssembler::AssertGeneratorObject(Register object) {
2188   if (!FLAG_debug_code) return;
2189   TestIfSmi(object);
2190   Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, cr0);
2191 
2192   // Load map
2193   Register map = object;
2194   push(object);
2195   LoadMap(map, object);
2196 
2197   // Check if JSGeneratorObject
2198   Label do_check;
2199   Register instance_type = object;
2200   CompareInstanceType(map, instance_type, JS_GENERATOR_OBJECT_TYPE);
2201   beq(&do_check);
2202 
2203   // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
2204   CmpS64(instance_type, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
2205   beq(&do_check);
2206 
2207   // Check if JSAsyncGeneratorObject (See MacroAssembler::CompareInstanceType)
2208   CmpS64(instance_type, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
2209 
2210   bind(&do_check);
2211   // Restore generator object to register and perform assertion
2212   pop(object);
2213   Check(eq, AbortReason::kOperandIsNotAGeneratorObject);
2214 }
2215 
2216 void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
2217                                                      Register scratch) {
2218   if (FLAG_debug_code) {
2219     Label done_checking;
2220     AssertNotSmi(object);
2221     CompareRoot(object, RootIndex::kUndefinedValue);
2222     beq(&done_checking, Label::kNear);
2223     LoadMap(scratch, object);
2224     CompareInstanceType(scratch, scratch, ALLOCATION_SITE_TYPE);
2225     Assert(eq, AbortReason::kExpectedUndefinedOrCell);
2226     bind(&done_checking);
2227   }
2228 }
2229 
2230 static const int kRegisterPassedArguments = 5;
2231 
2232 int TurboAssembler::CalculateStackPassedWords(int num_reg_arguments,
2233                                               int num_double_arguments) {
2234   int stack_passed_words = 0;
2235   if (num_double_arguments > DoubleRegister::kNumRegisters) {
2236     stack_passed_words +=
2237         2 * (num_double_arguments - DoubleRegister::kNumRegisters);
2238   }
2239   // Up to five simple arguments are passed in registers r2..r6
2240   if (num_reg_arguments > kRegisterPassedArguments) {
2241     stack_passed_words += num_reg_arguments - kRegisterPassedArguments;
2242   }
2243   return stack_passed_words;
2244 }
2245 
2246 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
2247                                           int num_double_arguments,
2248                                           Register scratch) {
2249   int frame_alignment = ActivationFrameAlignment();
2250   int stack_passed_arguments =
2251       CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
2252   int stack_space = kNumRequiredStackFrameSlots;
2253   if (frame_alignment > kSystemPointerSize) {
2254     // Make stack end at alignment and make room for stack arguments
2255     // -- preserving original value of sp.
2256     mov(scratch, sp);
2257     lay(sp, MemOperand(sp, -(stack_passed_arguments + 1) * kSystemPointerSize));
2258     DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
2259     ClearRightImm(sp, sp,
2260                   Operand(base::bits::WhichPowerOfTwo(frame_alignment)));
2261     StoreU64(scratch,
2262              MemOperand(sp, (stack_passed_arguments)*kSystemPointerSize));
2263   } else {
2264     stack_space += stack_passed_arguments;
2265   }
2266   lay(sp, MemOperand(sp, (-stack_space) * kSystemPointerSize));
2267 }
2268 
2269 void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
2270                                           Register scratch) {
2271   PrepareCallCFunction(num_reg_arguments, 0, scratch);
2272 }
2273 
2274 void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(d0, src); }
2275 
2276 void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(d0, src); }
2277 
2278 void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
2279                                           DoubleRegister src2) {
2280   if (src2 == d0) {
2281     DCHECK(src1 != d2);
2282     Move(d2, src2);
2283     Move(d0, src1);
2284   } else {
2285     Move(d0, src1);
2286     Move(d2, src2);
2287   }
2288 }
2289 
2290 void TurboAssembler::CallCFunction(ExternalReference function,
2291                                    int num_reg_arguments,
2292                                    int num_double_arguments) {
2293   Move(ip, function);
2294   CallCFunctionHelper(ip, num_reg_arguments, num_double_arguments);
2295 }
2296 
2297 void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
2298                                    int num_double_arguments) {
2299   CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
2300 }
2301 
2302 void TurboAssembler::CallCFunction(ExternalReference function,
2303                                    int num_arguments) {
2304   CallCFunction(function, num_arguments, 0);
2305 }
2306 
2307 void TurboAssembler::CallCFunction(Register function, int num_arguments) {
2308   CallCFunction(function, num_arguments, 0);
2309 }
2310 
2311 void TurboAssembler::CallCFunctionHelper(Register function,
2312                                          int num_reg_arguments,
2313                                          int num_double_arguments) {
2314   DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
2315   DCHECK(has_frame());
2316 
2317   // Save the frame pointer and PC so that the stack layout remains iterable,
2318   // even without an ExitFrame which normally exists between JS and C frames.
2319   Register addr_scratch = r1;
2320   // See x64 code for reasoning about how to address the isolate data fields.
2321   if (root_array_available()) {
2322     LoadPC(r0);
2323     StoreU64(r0, MemOperand(kRootRegister,
2324                             IsolateData::fast_c_call_caller_pc_offset()));
2325     StoreU64(fp, MemOperand(kRootRegister,
2326                             IsolateData::fast_c_call_caller_fp_offset()));
2327   } else {
2328     DCHECK_NOT_NULL(isolate());
2329 
2330     Move(addr_scratch,
2331          ExternalReference::fast_c_call_caller_pc_address(isolate()));
2332     LoadPC(r0);
2333     StoreU64(r0, MemOperand(addr_scratch));
2334     Move(addr_scratch,
2335          ExternalReference::fast_c_call_caller_fp_address(isolate()));
2336     StoreU64(fp, MemOperand(addr_scratch));
2337   }
2338 
2339   // Just call directly. The function called cannot cause a GC, or
2340   // allow preemption, so the return address in the link register
2341   // stays correct.
2342   Register dest = function;
2343   if (ABI_CALL_VIA_IP) {
2344     Move(ip, function);
2345     dest = ip;
2346   }
2347 
2348   Call(dest);
2349 
2350   // We don't unset the PC; the FP is the source of truth.
2351   Register zero_scratch = r0;
2352   lghi(zero_scratch, Operand::Zero());
2353 
2354   if (root_array_available()) {
2355     StoreU64(
2356         zero_scratch,
2357         MemOperand(kRootRegister, IsolateData::fast_c_call_caller_fp_offset()));
2358   } else {
2359     DCHECK_NOT_NULL(isolate());
2360     Move(addr_scratch,
2361          ExternalReference::fast_c_call_caller_fp_address(isolate()));
2362     StoreU64(zero_scratch, MemOperand(addr_scratch));
2363   }
2364 
2365   int stack_passed_arguments =
2366       CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
2367   int stack_space = kNumRequiredStackFrameSlots + stack_passed_arguments;
2368   if (ActivationFrameAlignment() > kSystemPointerSize) {
2369     // Load the original stack pointer (pre-alignment) from the stack
2370     LoadU64(sp, MemOperand(sp, stack_space * kSystemPointerSize));
2371   } else {
2372     la(sp, MemOperand(sp, stack_space * kSystemPointerSize));
2373   }
2374 }
2375 
2376 void TurboAssembler::CheckPageFlag(
2377     Register object,
2378     Register scratch,  // scratch may be same register as object
2379     int mask, Condition cc, Label* condition_met) {
2380   DCHECK(cc == ne || cc == eq);
2381   ClearRightImm(scratch, object, Operand(kPageSizeBits));
2382 
2383   if (base::bits::IsPowerOfTwo(mask)) {
2384     // If it's a power of two, we can use Test-Under-Mask Memory-Imm form
2385     // which allows testing of a single byte in memory.
2386     int32_t byte_offset = 4;
2387     uint32_t shifted_mask = mask;
2388     // Determine the byte offset to be tested
2389     if (mask <= 0x80) {
2390       byte_offset = kSystemPointerSize - 1;
2391     } else if (mask < 0x8000) {
2392       byte_offset = kSystemPointerSize - 2;
2393       shifted_mask = mask >> 8;
2394     } else if (mask < 0x800000) {
2395       byte_offset = kSystemPointerSize - 3;
2396       shifted_mask = mask >> 16;
2397     } else {
2398       byte_offset = kSystemPointerSize - 4;
2399       shifted_mask = mask >> 24;
2400     }
2401 #if V8_TARGET_LITTLE_ENDIAN
2402     // Reverse the byte_offset if emulating on little endian platform
2403     byte_offset = kSystemPointerSize - byte_offset - 1;
2404 #endif
2405     tm(MemOperand(scratch, BasicMemoryChunk::kFlagsOffset + byte_offset),
2406        Operand(shifted_mask));
2407   } else {
2408     LoadU64(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
2409     AndP(r0, scratch, Operand(mask));
2410   }
2411   // Should be okay to remove rc
2412 
2413   if (cc == ne) {
2414     bne(condition_met);
2415   }
2416   if (cc == eq) {
2417     beq(condition_met);
2418   }
2419 }
2420 
2421 Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
2422                                    Register reg4, Register reg5,
2423                                    Register reg6) {
2424   RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
2425 
2426   const RegisterConfiguration* config = RegisterConfiguration::Default();
2427   for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
2428     int code = config->GetAllocatableGeneralCode(i);
2429     Register candidate = Register::from_code(code);
2430     if (regs.has(candidate)) continue;
2431     return candidate;
2432   }
2433   UNREACHABLE();
2434 }
2435 
2436 void TurboAssembler::mov(Register dst, Register src) { lgr(dst, src); }
2437 
2438 void TurboAssembler::mov(Register dst, const Operand& src) {
2439   int64_t value = 0;
2440 
2441   if (src.is_heap_object_request()) {
2442     RequestHeapObject(src.heap_object_request());
2443   } else {
2444     value = src.immediate();
2445   }
2446 
2447   if (src.rmode() != RelocInfo::NO_INFO) {
2448     // some form of relocation needed
2449     RecordRelocInfo(src.rmode(), value);
2450   }
2451 
2452   int32_t hi_32 = static_cast<int32_t>(value >> 32);
2453   int32_t lo_32 = static_cast<int32_t>(value);
2454 
2455   if (src.rmode() == RelocInfo::NO_INFO) {
2456     if (hi_32 == 0) {
2457       if (is_uint16(lo_32)) {
2458         llill(dst, Operand(lo_32));
2459         return;
2460       }
2461       llilf(dst, Operand(lo_32));
2462       return;
2463     } else if (lo_32 == 0) {
2464       if (is_uint16(hi_32)) {
2465         llihl(dst, Operand(hi_32));
2466         return;
2467       }
2468       llihf(dst, Operand(hi_32));
2469       return;
2470     } else if (is_int16(value)) {
2471       lghi(dst, Operand(value));
2472       return;
2473     } else if (is_int32(value)) {
2474       lgfi(dst, Operand(value));
2475       return;
2476     }
2477   }
2478 
2479   iihf(dst, Operand(hi_32));
2480   iilf(dst, Operand(lo_32));
2481 }
2482 
2483 void TurboAssembler::MulS32(Register dst, const MemOperand& src1) {
2484   if (is_uint12(src1.offset())) {
2485     ms(dst, src1);
2486   } else if (is_int20(src1.offset())) {
2487     msy(dst, src1);
2488   } else {
2489     UNIMPLEMENTED();
2490   }
2491 }
2492 
2493 void TurboAssembler::MulS32(Register dst, Register src1) { msr(dst, src1); }
2494 
2495 void TurboAssembler::MulS32(Register dst, const Operand& src1) {
2496   msfi(dst, src1);
2497 }
2498 
2499 #define Generate_MulHigh32(instr) \
2500   {                               \
2501     lgfr(dst, src1);              \
2502     instr(dst, src2);             \
2503     srlg(dst, dst, Operand(32));  \
2504   }
2505 
2506 void TurboAssembler::MulHighS32(Register dst, Register src1,
2507                                 const MemOperand& src2) {
2508   Generate_MulHigh32(msgf);
2509 }
2510 
2511 void TurboAssembler::MulHighS32(Register dst, Register src1, Register src2) {
2512   if (dst == src2) {
2513     std::swap(src1, src2);
2514   }
2515   Generate_MulHigh32(msgfr);
2516 }
2517 
2518 void TurboAssembler::MulHighS32(Register dst, Register src1,
2519                                 const Operand& src2) {
2520   Generate_MulHigh32(msgfi);
2521 }
2522 
2523 #undef Generate_MulHigh32
2524 
2525 #define Generate_MulHighU32(instr) \
2526   {                                \
2527     lr(r1, src1);                  \
2528     instr(r0, src2);               \
2529     LoadU32(dst, r0);               \
2530   }
2531 
2532 void TurboAssembler::MulHighU32(Register dst, Register src1,
2533                                 const MemOperand& src2) {
2534   Generate_MulHighU32(ml);
2535 }
2536 
2537 void TurboAssembler::MulHighU32(Register dst, Register src1, Register src2) {
2538   Generate_MulHighU32(mlr);
2539 }
2540 
2541 void TurboAssembler::MulHighU32(Register dst, Register src1,
2542                                 const Operand& src2) {
2543   USE(dst);
2544   USE(src1);
2545   USE(src2);
2546   UNREACHABLE();
2547 }
2548 
2549 #undef Generate_MulHighU32
2550 
2551 #define Generate_Mul32WithOverflowIfCCUnequal(instr) \
2552   {                                                  \
2553     lgfr(dst, src1);                                 \
2554     instr(dst, src2);                                \
2555     cgfr(dst, dst);                                  \
2556   }
2557 
2558 void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2559                                                   const MemOperand& src2) {
2560   Register result = dst;
2561   if (src2.rx() == dst || src2.rb() == dst) dst = r0;
2562   Generate_Mul32WithOverflowIfCCUnequal(msgf);
2563   if (result != dst) llgfr(result, dst);
2564 }
2565 
2566 void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2567                                                   Register src2) {
2568   if (dst == src2) {
2569     std::swap(src1, src2);
2570   }
2571   Generate_Mul32WithOverflowIfCCUnequal(msgfr);
2572 }
2573 
2574 void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2575                                                   const Operand& src2) {
2576   Generate_Mul32WithOverflowIfCCUnequal(msgfi);
2577 }
2578 
2579 #undef Generate_Mul32WithOverflowIfCCUnequal
2580 
2581 #define Generate_Div32(instr) \
2582   {                           \
2583     lgfr(r1, src1);           \
2584     instr(r0, src2);          \
2585     LoadU32(dst, r1);          \
2586   }
2587 
2588 void TurboAssembler::DivS32(Register dst, Register src1,
2589                             const MemOperand& src2) {
2590   Generate_Div32(dsgf);
2591 }
2592 
2593 void TurboAssembler::DivS32(Register dst, Register src1, Register src2) {
2594   Generate_Div32(dsgfr);
2595 }
2596 
2597 #undef Generate_Div32
2598 
2599 #define Generate_DivU32(instr) \
2600   {                            \
2601     lr(r0, src1);              \
2602     srdl(r0, Operand(32));     \
2603     instr(r0, src2);           \
2604     LoadU32(dst, r1);           \
2605   }
2606 
2607 void TurboAssembler::DivU32(Register dst, Register src1,
2608                             const MemOperand& src2) {
2609   Generate_DivU32(dl);
2610 }
2611 
2612 void TurboAssembler::DivU32(Register dst, Register src1, Register src2) {
2613   Generate_DivU32(dlr);
2614 }
2615 
2616 #undef Generate_DivU32
2617 
2618 #define Generate_Div64(instr) \
2619   {                           \
2620     lgr(r1, src1);            \
2621     instr(r0, src2);          \
2622     lgr(dst, r1);             \
2623   }
2624 
2625 void TurboAssembler::DivS64(Register dst, Register src1,
2626                             const MemOperand& src2) {
2627   Generate_Div64(dsg);
2628 }
2629 
2630 void TurboAssembler::DivS64(Register dst, Register src1, Register src2) {
2631   Generate_Div64(dsgr);
2632 }
2633 
2634 #undef Generate_Div64
2635 
2636 #define Generate_DivU64(instr) \
2637   {                            \
2638     lgr(r1, src1);             \
2639     lghi(r0, Operand::Zero()); \
2640     instr(r0, src2);           \
2641     lgr(dst, r1);              \
2642   }
2643 
2644 void TurboAssembler::DivU64(Register dst, Register src1,
2645                             const MemOperand& src2) {
2646   Generate_DivU64(dlg);
2647 }
2648 
2649 void TurboAssembler::DivU64(Register dst, Register src1, Register src2) {
2650   Generate_DivU64(dlgr);
2651 }
2652 
2653 #undef Generate_DivU64
2654 
2655 #define Generate_Mod32(instr) \
2656   {                           \
2657     lgfr(r1, src1);           \
2658     instr(r0, src2);          \
2659     LoadU32(dst, r0);          \
2660   }
2661 
2662 void TurboAssembler::ModS32(Register dst, Register src1,
2663                             const MemOperand& src2) {
2664   Generate_Mod32(dsgf);
2665 }
2666 
2667 void TurboAssembler::ModS32(Register dst, Register src1, Register src2) {
2668   Generate_Mod32(dsgfr);
2669 }
2670 
2671 #undef Generate_Mod32
2672 
2673 #define Generate_ModU32(instr) \
2674   {                            \
2675     lr(r0, src1);              \
2676     srdl(r0, Operand(32));     \
2677     instr(r0, src2);           \
2678     LoadU32(dst, r0);           \
2679   }
2680 
2681 void TurboAssembler::ModU32(Register dst, Register src1,
2682                             const MemOperand& src2) {
2683   Generate_ModU32(dl);
2684 }
2685 
2686 void TurboAssembler::ModU32(Register dst, Register src1, Register src2) {
2687   Generate_ModU32(dlr);
2688 }
2689 
2690 #undef Generate_ModU32
2691 
2692 #define Generate_Mod64(instr) \
2693   {                           \
2694     lgr(r1, src1);            \
2695     instr(r0, src2);          \
2696     lgr(dst, r0);             \
2697   }
2698 
2699 void TurboAssembler::ModS64(Register dst, Register src1,
2700                             const MemOperand& src2) {
2701   Generate_Mod64(dsg);
2702 }
2703 
2704 void TurboAssembler::ModS64(Register dst, Register src1, Register src2) {
2705   Generate_Mod64(dsgr);
2706 }
2707 
2708 #undef Generate_Mod64
2709 
2710 #define Generate_ModU64(instr) \
2711   {                            \
2712     lgr(r1, src1);             \
2713     lghi(r0, Operand::Zero()); \
2714     instr(r0, src2);           \
2715     lgr(dst, r0);              \
2716   }
2717 
2718 void TurboAssembler::ModU64(Register dst, Register src1,
2719                             const MemOperand& src2) {
2720   Generate_ModU64(dlg);
2721 }
2722 
2723 void TurboAssembler::ModU64(Register dst, Register src1, Register src2) {
2724   Generate_ModU64(dlgr);
2725 }
2726 
2727 #undef Generate_ModU64
2728 
2729 void TurboAssembler::MulS64(Register dst, const Operand& opnd) {
2730   msgfi(dst, opnd);
2731 }
2732 
2733 void TurboAssembler::MulS64(Register dst, Register src) { msgr(dst, src); }
2734 
2735 void TurboAssembler::MulS64(Register dst, const MemOperand& opnd) {
2736   msg(dst, opnd);
2737 }
2738 
2739 void TurboAssembler::Sqrt(DoubleRegister result, DoubleRegister input) {
2740   sqdbr(result, input);
2741 }
2742 void TurboAssembler::Sqrt(DoubleRegister result, const MemOperand& input) {
2743   if (is_uint12(input.offset())) {
2744     sqdb(result, input);
2745   } else {
2746     ldy(result, input);
2747     sqdbr(result, result);
2748   }
2749 }
2750 //----------------------------------------------------------------------------
2751 //  Add Instructions
2752 //----------------------------------------------------------------------------
2753 
2754 // Add 32-bit (Register dst = Register dst + Immediate opnd)
2755 void TurboAssembler::AddS32(Register dst, const Operand& opnd) {
2756   if (is_int16(opnd.immediate()))
2757     ahi(dst, opnd);
2758   else
2759     afi(dst, opnd);
2760 }
2761 
2762 // Add Pointer Size (Register dst = Register dst + Immediate opnd)
2763 void TurboAssembler::AddS64(Register dst, const Operand& opnd) {
2764   if (is_int16(opnd.immediate()))
2765     aghi(dst, opnd);
2766   else
2767     agfi(dst, opnd);
2768 }
2769 
2770 void TurboAssembler::AddS32(Register dst, Register src, int32_t opnd) {
2771   AddS32(dst, src, Operand(opnd));
2772 }
2773 
2774 // Add 32-bit (Register dst = Register src + Immediate opnd)
2775 void TurboAssembler::AddS32(Register dst, Register src, const Operand& opnd) {
2776   if (dst != src) {
2777     if (CpuFeatures::IsSupported(DISTINCT_OPS) && is_int16(opnd.immediate())) {
2778       ahik(dst, src, opnd);
2779       return;
2780     }
2781     lr(dst, src);
2782   }
2783   AddS32(dst, opnd);
2784 }
2785 
2786 void TurboAssembler::AddS64(Register dst, Register src, int32_t opnd) {
2787   AddS64(dst, src, Operand(opnd));
2788 }
2789 
2790 // Add Pointer Size (Register dst = Register src + Immediate opnd)
2791 void TurboAssembler::AddS64(Register dst, Register src, const Operand& opnd) {
2792   if (dst != src) {
2793     if (CpuFeatures::IsSupported(DISTINCT_OPS) && is_int16(opnd.immediate())) {
2794       aghik(dst, src, opnd);
2795       return;
2796     }
2797     mov(dst, src);
2798   }
2799   AddS64(dst, opnd);
2800 }
2801 
2802 // Add 32-bit (Register dst = Register dst + Register src)
2803 void TurboAssembler::AddS32(Register dst, Register src) { ar(dst, src); }
2804 
2805 // Add Pointer Size (Register dst = Register dst + Register src)
2806 void TurboAssembler::AddS64(Register dst, Register src) { agr(dst, src); }
2807 
2808 // Add 32-bit (Register dst = Register src1 + Register src2)
2809 void TurboAssembler::AddS32(Register dst, Register src1, Register src2) {
2810   if (dst != src1 && dst != src2) {
2811     // We prefer to generate AR/AGR, over the non clobbering ARK/AGRK
2812     // as AR is a smaller instruction
2813     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2814       ark(dst, src1, src2);
2815       return;
2816     } else {
2817       lr(dst, src1);
2818     }
2819   } else if (dst == src2) {
2820     src2 = src1;
2821   }
2822   ar(dst, src2);
2823 }
2824 
2825 // Add Pointer Size (Register dst = Register src1 + Register src2)
2826 void TurboAssembler::AddS64(Register dst, Register src1, Register src2) {
2827   if (dst != src1 && dst != src2) {
2828     // We prefer to generate AR/AGR, over the non clobbering ARK/AGRK
2829     // as AR is a smaller instruction
2830     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2831       agrk(dst, src1, src2);
2832       return;
2833     } else {
2834       mov(dst, src1);
2835     }
2836   } else if (dst == src2) {
2837     src2 = src1;
2838   }
2839   agr(dst, src2);
2840 }
2841 
2842 // Add 32-bit (Register-Memory)
2843 void TurboAssembler::AddS32(Register dst, const MemOperand& opnd) {
2844   DCHECK(is_int20(opnd.offset()));
2845   if (is_uint12(opnd.offset()))
2846     a(dst, opnd);
2847   else
2848     ay(dst, opnd);
2849 }
2850 
2851 // Add Pointer Size (Register-Memory)
2852 void TurboAssembler::AddS64(Register dst, const MemOperand& opnd) {
2853   DCHECK(is_int20(opnd.offset()));
2854   ag(dst, opnd);
2855 }
2856 
2857 // Add 32-bit (Memory - Immediate)
2858 void TurboAssembler::AddS32(const MemOperand& opnd, const Operand& imm) {
2859   DCHECK(is_int8(imm.immediate()));
2860   DCHECK(is_int20(opnd.offset()));
2861   DCHECK(CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
2862   asi(opnd, imm);
2863 }
2864 
2865 // Add Pointer-sized (Memory - Immediate)
2866 void TurboAssembler::AddS64(const MemOperand& opnd, const Operand& imm) {
2867   DCHECK(is_int8(imm.immediate()));
2868   DCHECK(is_int20(opnd.offset()));
2869   DCHECK(CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
2870   agsi(opnd, imm);
2871 }
2872 
2873 //----------------------------------------------------------------------------
2874 //  Add Logical Instructions
2875 //----------------------------------------------------------------------------
2876 
2877 // Add Logical 32-bit (Register dst = Register src1 + Register src2)
2878 void TurboAssembler::AddU32(Register dst, Register src1, Register src2) {
2879   if (dst != src2 && dst != src1) {
2880     lr(dst, src1);
2881     alr(dst, src2);
2882   } else if (dst != src2) {
2883     // dst == src1
2884     DCHECK(dst == src1);
2885     alr(dst, src2);
2886   } else {
2887     // dst == src2
2888     DCHECK(dst == src2);
2889     alr(dst, src1);
2890   }
2891 }
2892 
2893 // Add Logical 32-bit (Register dst = Register dst + Immediate opnd)
2894 void TurboAssembler::AddU32(Register dst, const Operand& imm) {
2895   alfi(dst, imm);
2896 }
2897 
2898 // Add Logical Pointer Size (Register dst = Register dst + Immediate opnd)
2899 void TurboAssembler::AddU64(Register dst, const Operand& imm) {
2900   algfi(dst, imm);
2901 }
2902 
2903 void TurboAssembler::AddU64(Register dst, Register src1, Register src2) {
2904   if (dst != src2 && dst != src1) {
2905     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2906       algrk(dst, src1, src2);
2907     } else {
2908       lgr(dst, src1);
2909       algr(dst, src2);
2910     }
2911   } else if (dst != src2) {
2912     // dst == src1
2913     DCHECK(dst == src1);
2914     algr(dst, src2);
2915   } else {
2916     // dst == src2
2917     DCHECK(dst == src2);
2918     algr(dst, src1);
2919   }
2920 }
2921 
2922 // Add Logical 32-bit (Register-Memory)
2923 void TurboAssembler::AddU32(Register dst, const MemOperand& opnd) {
2924   DCHECK(is_int20(opnd.offset()));
2925   if (is_uint12(opnd.offset()))
2926     al_z(dst, opnd);
2927   else
2928     aly(dst, opnd);
2929 }
2930 
2931 // Add Logical Pointer Size (Register-Memory)
2932 void TurboAssembler::AddU64(Register dst, const MemOperand& opnd) {
2933   DCHECK(is_int20(opnd.offset()));
2934   alg(dst, opnd);
2935 }
2936 
2937 //----------------------------------------------------------------------------
2938 //  Subtract Instructions
2939 //----------------------------------------------------------------------------
2940 
2941 // Subtract Logical 32-bit (Register dst = Register src1 - Register src2)
2942 void TurboAssembler::SubU32(Register dst, Register src1, Register src2) {
2943   if (dst != src2 && dst != src1) {
2944     lr(dst, src1);
2945     slr(dst, src2);
2946   } else if (dst != src2) {
2947     // dst == src1
2948     DCHECK(dst == src1);
2949     slr(dst, src2);
2950   } else {
2951     // dst == src2
2952     DCHECK(dst == src2);
2953     lr(r0, dst);
2954     SubU32(dst, src1, r0);
2955   }
2956 }
2957 
2958 // Subtract 32-bit (Register dst = Register dst - Immediate opnd)
2959 void TurboAssembler::SubS32(Register dst, const Operand& imm) {
2960   AddS32(dst, Operand(-(imm.immediate())));
2961 }
2962 
2963 // Subtract Pointer Size (Register dst = Register dst - Immediate opnd)
2964 void TurboAssembler::SubS64(Register dst, const Operand& imm) {
2965   AddS64(dst, Operand(-(imm.immediate())));
2966 }
2967 
2968 void TurboAssembler::SubS32(Register dst, Register src, int32_t imm) {
2969   SubS32(dst, src, Operand(imm));
2970 }
2971 
2972 // Subtract 32-bit (Register dst = Register src - Immediate opnd)
2973 void TurboAssembler::SubS32(Register dst, Register src, const Operand& imm) {
2974   AddS32(dst, src, Operand(-(imm.immediate())));
2975 }
2976 
2977 void TurboAssembler::SubS64(Register dst, Register src, int32_t imm) {
2978   SubS64(dst, src, Operand(imm));
2979 }
2980 
2981 // Subtract Pointer Sized (Register dst = Register src - Immediate opnd)
2982 void TurboAssembler::SubS64(Register dst, Register src, const Operand& imm) {
2983   AddS64(dst, src, Operand(-(imm.immediate())));
2984 }
2985 
2986 // Subtract 32-bit (Register dst = Register dst - Register src)
2987 void TurboAssembler::SubS32(Register dst, Register src) { sr(dst, src); }
2988 
2989 // Subtract Pointer Size (Register dst = Register dst - Register src)
2990 void TurboAssembler::SubS64(Register dst, Register src) { sgr(dst, src); }
2991 
2992 // Subtract 32-bit (Register = Register - Register)
2993 void TurboAssembler::SubS32(Register dst, Register src1, Register src2) {
2994   // Use non-clobbering version if possible
2995   if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2996     srk(dst, src1, src2);
2997     return;
2998   }
2999   if (dst != src1 && dst != src2) lr(dst, src1);
3000   // In scenario where we have dst = src - dst, we need to swap and negate
3001   if (dst != src1 && dst == src2) {
3002     Label done;
3003     lcr(dst, dst);  // dst = -dst
3004     b(overflow, &done);
3005     ar(dst, src1);  // dst = dst + src
3006     bind(&done);
3007   } else {
3008     sr(dst, src2);
3009   }
3010 }
3011 
3012 // Subtract Pointer Sized (Register = Register - Register)
3013 void TurboAssembler::SubS64(Register dst, Register src1, Register src2) {
3014   // Use non-clobbering version if possible
3015   if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3016     sgrk(dst, src1, src2);
3017     return;
3018   }
3019   if (dst != src1 && dst != src2) mov(dst, src1);
3020   // In scenario where we have dst = src - dst, we need to swap and negate
3021   if (dst != src1 && dst == src2) {
3022     Label done;
3023     lcgr(dst, dst);  // dst = -dst
3024     b(overflow, &done);
3025     AddS64(dst, src1);  // dst = dst + src
3026     bind(&done);
3027   } else {
3028     SubS64(dst, src2);
3029   }
3030 }
3031 
3032 // Subtract 32-bit (Register-Memory)
3033 void TurboAssembler::SubS32(Register dst, const MemOperand& opnd) {
3034   DCHECK(is_int20(opnd.offset()));
3035   if (is_uint12(opnd.offset()))
3036     s(dst, opnd);
3037   else
3038     sy(dst, opnd);
3039 }
3040 
3041 // Subtract Pointer Sized (Register - Memory)
3042 void TurboAssembler::SubS64(Register dst, const MemOperand& opnd) {
3043 #if V8_TARGET_ARCH_S390X
3044   sg(dst, opnd);
3045 #else
3046   SubS32(dst, opnd);
3047 #endif
3048 }
3049 
3050 void TurboAssembler::MovIntToFloat(DoubleRegister dst, Register src) {
3051   sllg(r0, src, Operand(32));
3052   ldgr(dst, r0);
3053 }
3054 
3055 void TurboAssembler::MovFloatToInt(Register dst, DoubleRegister src) {
3056   lgdr(dst, src);
3057   srlg(dst, dst, Operand(32));
3058 }
3059 
3060 // Load And Subtract 32-bit (similar to laa/lan/lao/lax)
3061 void TurboAssembler::LoadAndSub32(Register dst, Register src,
3062                                   const MemOperand& opnd) {
3063   lcr(dst, src);
3064   laa(dst, dst, opnd);
3065 }
3066 
3067 void TurboAssembler::LoadAndSub64(Register dst, Register src,
3068                                   const MemOperand& opnd) {
3069   lcgr(dst, src);
3070   laag(dst, dst, opnd);
3071 }
3072 
3073 //----------------------------------------------------------------------------
3074 //  Subtract Logical Instructions
3075 //----------------------------------------------------------------------------
3076 
3077 // Subtract Logical 32-bit (Register - Memory)
3078 void TurboAssembler::SubU32(Register dst, const MemOperand& opnd) {
3079   DCHECK(is_int20(opnd.offset()));
3080   if (is_uint12(opnd.offset()))
3081     sl(dst, opnd);
3082   else
3083     sly(dst, opnd);
3084 }
3085 
3086 // Subtract Logical Pointer Sized (Register - Memory)
3087 void TurboAssembler::SubU64(Register dst, const MemOperand& opnd) {
3088   DCHECK(is_int20(opnd.offset()));
3089 #if V8_TARGET_ARCH_S390X
3090   slgf(dst, opnd);
3091 #else
3092   SubU32(dst, opnd);
3093 #endif
3094 }
3095 
3096 //----------------------------------------------------------------------------
3097 //  Bitwise Operations
3098 //----------------------------------------------------------------------------
3099 
3100 // AND 32-bit - dst = dst & src
3101 void TurboAssembler::And(Register dst, Register src) { nr(dst, src); }
3102 
3103 // AND Pointer Size - dst = dst & src
3104 void TurboAssembler::AndP(Register dst, Register src) { ngr(dst, src); }
3105 
3106 // Non-clobbering AND 32-bit - dst = src1 & src1
3107 void TurboAssembler::And(Register dst, Register src1, Register src2) {
3108   if (dst != src1 && dst != src2) {
3109     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3110     // as XR is a smaller instruction
3111     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3112       nrk(dst, src1, src2);
3113       return;
3114     } else {
3115       lr(dst, src1);
3116     }
3117   } else if (dst == src2) {
3118     src2 = src1;
3119   }
3120   And(dst, src2);
3121 }
3122 
3123 // Non-clobbering AND pointer size - dst = src1 & src1
3124 void TurboAssembler::AndP(Register dst, Register src1, Register src2) {
3125   if (dst != src1 && dst != src2) {
3126     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3127     // as XR is a smaller instruction
3128     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3129       ngrk(dst, src1, src2);
3130       return;
3131     } else {
3132       mov(dst, src1);
3133     }
3134   } else if (dst == src2) {
3135     src2 = src1;
3136   }
3137   AndP(dst, src2);
3138 }
3139 
3140 // AND 32-bit (Reg - Mem)
3141 void TurboAssembler::And(Register dst, const MemOperand& opnd) {
3142   DCHECK(is_int20(opnd.offset()));
3143   if (is_uint12(opnd.offset()))
3144     n(dst, opnd);
3145   else
3146     ny(dst, opnd);
3147 }
3148 
3149 // AND Pointer Size (Reg - Mem)
3150 void TurboAssembler::AndP(Register dst, const MemOperand& opnd) {
3151   DCHECK(is_int20(opnd.offset()));
3152 #if V8_TARGET_ARCH_S390X
3153   ng(dst, opnd);
3154 #else
3155   And(dst, opnd);
3156 #endif
3157 }
3158 
3159 // AND 32-bit - dst = dst & imm
3160 void TurboAssembler::And(Register dst, const Operand& opnd) { nilf(dst, opnd); }
3161 
3162 // AND Pointer Size - dst = dst & imm
3163 void TurboAssembler::AndP(Register dst, const Operand& opnd) {
3164 #if V8_TARGET_ARCH_S390X
3165   intptr_t value = opnd.immediate();
3166   if (value >> 32 != -1) {
3167     // this may not work b/c condition code won't be set correctly
3168     nihf(dst, Operand(value >> 32));
3169   }
3170   nilf(dst, Operand(value & 0xFFFFFFFF));
3171 #else
3172   And(dst, opnd);
3173 #endif
3174 }
3175 
3176 // AND 32-bit - dst = src & imm
3177 void TurboAssembler::And(Register dst, Register src, const Operand& opnd) {
3178   if (dst != src) lr(dst, src);
3179   nilf(dst, opnd);
3180 }
3181 
3182 // AND Pointer Size - dst = src & imm
3183 void TurboAssembler::AndP(Register dst, Register src, const Operand& opnd) {
3184   // Try to exploit RISBG first
3185   intptr_t value = opnd.immediate();
3186   if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
3187     intptr_t shifted_value = value;
3188     int trailing_zeros = 0;
3189 
3190     // We start checking how many trailing zeros are left at the end.
3191     while ((0 != shifted_value) && (0 == (shifted_value & 1))) {
3192       trailing_zeros++;
3193       shifted_value >>= 1;
3194     }
3195 
3196     // If temp (value with right-most set of zeros shifted out) is 1 less
3197     // than power of 2, we have consecutive bits of 1.
3198     // Special case: If shift_value is zero, we cannot use RISBG, as it requires
3199     //               selection of at least 1 bit.
3200     if ((0 != shifted_value) && base::bits::IsPowerOfTwo(shifted_value + 1)) {
3201       int startBit =
3202           base::bits::CountLeadingZeros64(shifted_value) - trailing_zeros;
3203       int endBit = 63 - trailing_zeros;
3204       // Start: startBit, End: endBit, Shift = 0, true = zero unselected bits.
3205       RotateInsertSelectBits(dst, src, Operand(startBit), Operand(endBit),
3206                              Operand::Zero(), true);
3207       return;
3208     } else if (-1 == shifted_value) {
3209       // A Special case in which all top bits up to MSB are 1's.  In this case,
3210       // we can set startBit to be 0.
3211       int endBit = 63 - trailing_zeros;
3212       RotateInsertSelectBits(dst, src, Operand::Zero(), Operand(endBit),
3213                              Operand::Zero(), true);
3214       return;
3215     }
3216   }
3217 
3218   // If we are &'ing zero, we can just whack the dst register and skip copy
3219   if (dst != src && (0 != value)) mov(dst, src);
3220   AndP(dst, opnd);
3221 }
3222 
3223 // OR 32-bit - dst = dst & src
3224 void TurboAssembler::Or(Register dst, Register src) { or_z(dst, src); }
3225 
3226 // OR Pointer Size - dst = dst & src
3227 void TurboAssembler::OrP(Register dst, Register src) { ogr(dst, src); }
3228 
3229 // Non-clobbering OR 32-bit - dst = src1 & src1
3230 void TurboAssembler::Or(Register dst, Register src1, Register src2) {
3231   if (dst != src1 && dst != src2) {
3232     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3233     // as XR is a smaller instruction
3234     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3235       ork(dst, src1, src2);
3236       return;
3237     } else {
3238       lr(dst, src1);
3239     }
3240   } else if (dst == src2) {
3241     src2 = src1;
3242   }
3243   Or(dst, src2);
3244 }
3245 
3246 // Non-clobbering OR pointer size - dst = src1 & src1
3247 void TurboAssembler::OrP(Register dst, Register src1, Register src2) {
3248   if (dst != src1 && dst != src2) {
3249     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3250     // as XR is a smaller instruction
3251     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3252       ogrk(dst, src1, src2);
3253       return;
3254     } else {
3255       mov(dst, src1);
3256     }
3257   } else if (dst == src2) {
3258     src2 = src1;
3259   }
3260   OrP(dst, src2);
3261 }
3262 
3263 // OR 32-bit (Reg - Mem)
3264 void TurboAssembler::Or(Register dst, const MemOperand& opnd) {
3265   DCHECK(is_int20(opnd.offset()));
3266   if (is_uint12(opnd.offset()))
3267     o(dst, opnd);
3268   else
3269     oy(dst, opnd);
3270 }
3271 
3272 // OR Pointer Size (Reg - Mem)
3273 void TurboAssembler::OrP(Register dst, const MemOperand& opnd) {
3274   DCHECK(is_int20(opnd.offset()));
3275 #if V8_TARGET_ARCH_S390X
3276   og(dst, opnd);
3277 #else
3278   Or(dst, opnd);
3279 #endif
3280 }
3281 
3282 // OR 32-bit - dst = dst & imm
3283 void TurboAssembler::Or(Register dst, const Operand& opnd) { oilf(dst, opnd); }
3284 
3285 // OR Pointer Size - dst = dst & imm
3286 void TurboAssembler::OrP(Register dst, const Operand& opnd) {
3287 #if V8_TARGET_ARCH_S390X
3288   intptr_t value = opnd.immediate();
3289   if (value >> 32 != 0) {
3290     // this may not work b/c condition code won't be set correctly
3291     oihf(dst, Operand(value >> 32));
3292   }
3293   oilf(dst, Operand(value & 0xFFFFFFFF));
3294 #else
3295   Or(dst, opnd);
3296 #endif
3297 }
3298 
3299 // OR 32-bit - dst = src & imm
3300 void TurboAssembler::Or(Register dst, Register src, const Operand& opnd) {
3301   if (dst != src) lr(dst, src);
3302   oilf(dst, opnd);
3303 }
3304 
3305 // OR Pointer Size - dst = src & imm
3306 void TurboAssembler::OrP(Register dst, Register src, const Operand& opnd) {
3307   if (dst != src) mov(dst, src);
3308   OrP(dst, opnd);
3309 }
3310 
3311 // XOR 32-bit - dst = dst & src
3312 void TurboAssembler::Xor(Register dst, Register src) { xr(dst, src); }
3313 
3314 // XOR Pointer Size - dst = dst & src
3315 void TurboAssembler::XorP(Register dst, Register src) { xgr(dst, src); }
3316 
3317 // Non-clobbering XOR 32-bit - dst = src1 & src1
3318 void TurboAssembler::Xor(Register dst, Register src1, Register src2) {
3319   if (dst != src1 && dst != src2) {
3320     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3321     // as XR is a smaller instruction
3322     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3323       xrk(dst, src1, src2);
3324       return;
3325     } else {
3326       lr(dst, src1);
3327     }
3328   } else if (dst == src2) {
3329     src2 = src1;
3330   }
3331   Xor(dst, src2);
3332 }
3333 
3334 // Non-clobbering XOR pointer size - dst = src1 & src1
3335 void TurboAssembler::XorP(Register dst, Register src1, Register src2) {
3336   if (dst != src1 && dst != src2) {
3337     // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3338     // as XR is a smaller instruction
3339     if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3340       xgrk(dst, src1, src2);
3341       return;
3342     } else {
3343       mov(dst, src1);
3344     }
3345   } else if (dst == src2) {
3346     src2 = src1;
3347   }
3348   XorP(dst, src2);
3349 }
3350 
3351 // XOR 32-bit (Reg - Mem)
3352 void TurboAssembler::Xor(Register dst, const MemOperand& opnd) {
3353   DCHECK(is_int20(opnd.offset()));
3354   if (is_uint12(opnd.offset()))
3355     x(dst, opnd);
3356   else
3357     xy(dst, opnd);
3358 }
3359 
3360 // XOR Pointer Size (Reg - Mem)
3361 void TurboAssembler::XorP(Register dst, const MemOperand& opnd) {
3362   DCHECK(is_int20(opnd.offset()));
3363 #if V8_TARGET_ARCH_S390X
3364   xg(dst, opnd);
3365 #else
3366   Xor(dst, opnd);
3367 #endif
3368 }
3369 
3370 // XOR 32-bit - dst = dst & imm
3371 void TurboAssembler::Xor(Register dst, const Operand& opnd) { xilf(dst, opnd); }
3372 
3373 // XOR Pointer Size - dst = dst & imm
3374 void TurboAssembler::XorP(Register dst, const Operand& opnd) {
3375 #if V8_TARGET_ARCH_S390X
3376   intptr_t value = opnd.immediate();
3377   xihf(dst, Operand(value >> 32));
3378   xilf(dst, Operand(value & 0xFFFFFFFF));
3379 #else
3380   Xor(dst, opnd);
3381 #endif
3382 }
3383 
3384 // XOR 32-bit - dst = src & imm
3385 void TurboAssembler::Xor(Register dst, Register src, const Operand& opnd) {
3386   if (dst != src) lr(dst, src);
3387   xilf(dst, opnd);
3388 }
3389 
3390 // XOR Pointer Size - dst = src & imm
3391 void TurboAssembler::XorP(Register dst, Register src, const Operand& opnd) {
3392   if (dst != src) mov(dst, src);
3393   XorP(dst, opnd);
3394 }
3395 
3396 void TurboAssembler::Not32(Register dst, Register src) {
3397   if (src != no_reg && src != dst) lr(dst, src);
3398   xilf(dst, Operand(0xFFFFFFFF));
3399 }
3400 
3401 void TurboAssembler::Not64(Register dst, Register src) {
3402   if (src != no_reg && src != dst) lgr(dst, src);
3403   xihf(dst, Operand(0xFFFFFFFF));
3404   xilf(dst, Operand(0xFFFFFFFF));
3405 }
3406 
3407 void TurboAssembler::NotP(Register dst, Register src) {
3408 #if V8_TARGET_ARCH_S390X
3409   Not64(dst, src);
3410 #else
3411   Not32(dst, src);
3412 #endif
3413 }
3414 
3415 void TurboAssembler::LoadPositiveP(Register result, Register input) {
3416 #if V8_TARGET_ARCH_S390X
3417   lpgr(result, input);
3418 #else
3419   lpr(result, input);
3420 #endif
3421 }
3422 
3423 void TurboAssembler::LoadPositive32(Register result, Register input) {
3424   lpr(result, input);
3425   lgfr(result, result);
3426 }
3427 
3428 //-----------------------------------------------------------------------------
3429 //  Compare Helpers
3430 //-----------------------------------------------------------------------------
3431 
3432 // Compare 32-bit Register vs Register
3433 void TurboAssembler::CmpS32(Register src1, Register src2) { cr_z(src1, src2); }
3434 
3435 // Compare Pointer Sized Register vs Register
3436 void TurboAssembler::CmpS64(Register src1, Register src2) { cgr(src1, src2); }
3437 
3438 // Compare 32-bit Register vs Immediate
3439 // This helper will set up proper relocation entries if required.
3440 void TurboAssembler::CmpS32(Register dst, const Operand& opnd) {
3441   if (opnd.rmode() == RelocInfo::NO_INFO) {
3442     intptr_t value = opnd.immediate();
3443     if (is_int16(value))
3444       chi(dst, opnd);
3445     else
3446       cfi(dst, opnd);
3447   } else {
3448     // Need to generate relocation record here
3449     RecordRelocInfo(opnd.rmode(), opnd.immediate());
3450     cfi(dst, opnd);
3451   }
3452 }
3453 
3454 // Compare Pointer Sized  Register vs Immediate
3455 // This helper will set up proper relocation entries if required.
3456 void TurboAssembler::CmpS64(Register dst, const Operand& opnd) {
3457   if (opnd.rmode() == RelocInfo::NO_INFO) {
3458     cgfi(dst, opnd);
3459   } else {
3460     mov(r0, opnd);  // Need to generate 64-bit relocation
3461     cgr(dst, r0);
3462   }
3463 }
3464 
3465 // Compare 32-bit Register vs Memory
3466 void TurboAssembler::CmpS32(Register dst, const MemOperand& opnd) {
3467   // make sure offset is within 20 bit range
3468   DCHECK(is_int20(opnd.offset()));
3469   if (is_uint12(opnd.offset()))
3470     c(dst, opnd);
3471   else
3472     cy(dst, opnd);
3473 }
3474 
3475 // Compare Pointer Size Register vs Memory
3476 void TurboAssembler::CmpS64(Register dst, const MemOperand& opnd) {
3477   // make sure offset is within 20 bit range
3478   DCHECK(is_int20(opnd.offset()));
3479   cg(dst, opnd);
3480 }
3481 
3482 // Using cs or scy based on the offset
3483 void TurboAssembler::CmpAndSwap(Register old_val, Register new_val,
3484                                 const MemOperand& opnd) {
3485   if (is_uint12(opnd.offset())) {
3486     cs(old_val, new_val, opnd);
3487   } else {
3488     csy(old_val, new_val, opnd);
3489   }
3490 }
3491 
3492 void TurboAssembler::CmpAndSwap64(Register old_val, Register new_val,
3493                                   const MemOperand& opnd) {
3494   DCHECK(is_int20(opnd.offset()));
3495   csg(old_val, new_val, opnd);
3496 }
3497 
3498 //-----------------------------------------------------------------------------
3499 // Compare Logical Helpers
3500 //-----------------------------------------------------------------------------
3501 
3502 // Compare Logical 32-bit Register vs Register
3503 void TurboAssembler::CmpU32(Register dst, Register src) { clr(dst, src); }
3504 
3505 // Compare Logical Pointer Sized Register vs Register
3506 void TurboAssembler::CmpU64(Register dst, Register src) {
3507 #ifdef V8_TARGET_ARCH_S390X
3508   clgr(dst, src);
3509 #else
3510   CmpU32(dst, src);
3511 #endif
3512 }
3513 
3514 // Compare Logical 32-bit Register vs Immediate
3515 void TurboAssembler::CmpU32(Register dst, const Operand& opnd) {
3516   clfi(dst, opnd);
3517 }
3518 
3519 // Compare Logical Pointer Sized Register vs Immediate
3520 void TurboAssembler::CmpU64(Register dst, const Operand& opnd) {
3521 #if V8_TARGET_ARCH_S390X
3522   DCHECK_EQ(static_cast<uint32_t>(opnd.immediate() >> 32), 0);
3523   clgfi(dst, opnd);
3524 #else
3525   CmpU32(dst, opnd);
3526 #endif
3527 }
3528 
3529 // Compare Logical 32-bit Register vs Memory
3530 void TurboAssembler::CmpU32(Register dst, const MemOperand& opnd) {
3531   // make sure offset is within 20 bit range
3532   DCHECK(is_int20(opnd.offset()));
3533   if (is_uint12(opnd.offset()))
3534     cl(dst, opnd);
3535   else
3536     cly(dst, opnd);
3537 }
3538 
3539 // Compare Logical Pointer Sized Register vs Memory
3540 void TurboAssembler::CmpU64(Register dst, const MemOperand& opnd) {
3541   // make sure offset is within 20 bit range
3542   DCHECK(is_int20(opnd.offset()));
3543 #if V8_TARGET_ARCH_S390X
3544   clg(dst, opnd);
3545 #else
3546   CmpU32(dst, opnd);
3547 #endif
3548 }
3549 
3550 void TurboAssembler::Branch(Condition c, const Operand& opnd) {
3551   intptr_t value = opnd.immediate();
3552   if (is_int16(value))
3553     brc(c, opnd);
3554   else
3555     brcl(c, opnd);
3556 }
3557 
3558 // Branch On Count.  Decrement R1, and branch if R1 != 0.
3559 void TurboAssembler::BranchOnCount(Register r1, Label* l) {
3560   int32_t offset = branch_offset(l);
3561   if (is_int16(offset)) {
3562 #if V8_TARGET_ARCH_S390X
3563     brctg(r1, Operand(offset));
3564 #else
3565     brct(r1, Operand(offset));
3566 #endif
3567   } else {
3568     AddS64(r1, Operand(-1));
3569     Branch(ne, Operand(offset));
3570   }
3571 }
3572 
3573 void TurboAssembler::LoadSmiLiteral(Register dst, Smi smi) {
3574   intptr_t value = static_cast<intptr_t>(smi.ptr());
3575 #if defined(V8_COMPRESS_POINTERS) || defined(V8_31BIT_SMIS_ON_64BIT_ARCH)
3576   llilf(dst, Operand(value));
3577 #else
3578   DCHECK_EQ(value & 0xFFFFFFFF, 0);
3579   // The smi value is loaded in upper 32-bits.  Lower 32-bit are zeros.
3580   llihf(dst, Operand(value >> 32));
3581 #endif
3582 }
3583 
3584 void TurboAssembler::CmpSmiLiteral(Register src1, Smi smi, Register scratch) {
3585 #if defined(V8_COMPRESS_POINTERS) || defined(V8_31BIT_SMIS_ON_64BIT_ARCH)
3586   // CFI takes 32-bit immediate.
3587   cfi(src1, Operand(smi));
3588 #else
3589   if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3590     cih(src1, Operand(static_cast<intptr_t>(smi.ptr()) >> 32));
3591   } else {
3592     LoadSmiLiteral(scratch, smi);
3593     cgr(src1, scratch);
3594   }
3595 #endif
3596 }
3597 
3598 void TurboAssembler::LoadU64(Register dst, const MemOperand& mem,
3599                              Register scratch) {
3600   int offset = mem.offset();
3601 
3602   MemOperand src = mem;
3603   if (!is_int20(offset)) {
3604     DCHECK(scratch != no_reg && scratch != r0 && mem.rx() == r0);
3605     DCHECK(scratch != mem.rb());
3606     mov(scratch, Operand(offset));
3607     src = MemOperand(mem.rb(), scratch);
3608   }
3609   lg(dst, src);
3610 }
3611 
3612 // Store a "pointer" sized value to the memory location
3613 void TurboAssembler::StoreU64(Register src, const MemOperand& mem,
3614                               Register scratch) {
3615   if (!is_int20(mem.offset())) {
3616     DCHECK(scratch != no_reg);
3617     DCHECK(scratch != r0);
3618     mov(scratch, Operand(mem.offset()));
3619     stg(src, MemOperand(mem.rb(), scratch));
3620   } else {
3621     stg(src, mem);
3622   }
3623 }
3624 
3625 // Store a "pointer" sized constant to the memory location
3626 void TurboAssembler::StoreU64(const MemOperand& mem, const Operand& opnd,
3627                               Register scratch) {
3628   // Relocations not supported
3629   DCHECK_EQ(opnd.rmode(), RelocInfo::NO_INFO);
3630 
3631   // Try to use MVGHI/MVHI
3632   if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT) && is_uint12(mem.offset()) &&
3633       mem.getIndexRegister() == r0 && is_int16(opnd.immediate())) {
3634     mvghi(mem, opnd);
3635   } else {
3636     mov(scratch, opnd);
3637     StoreU64(scratch, mem);
3638   }
3639 }
3640 
3641 void TurboAssembler::LoadMultipleP(Register dst1, Register dst2,
3642                                    const MemOperand& mem) {
3643 #if V8_TARGET_ARCH_S390X
3644   DCHECK(is_int20(mem.offset()));
3645   lmg(dst1, dst2, mem);
3646 #else
3647   if (is_uint12(mem.offset())) {
3648     lm(dst1, dst2, mem);
3649   } else {
3650     DCHECK(is_int20(mem.offset()));
3651     lmy(dst1, dst2, mem);
3652   }
3653 #endif
3654 }
3655 
3656 void TurboAssembler::StoreMultipleP(Register src1, Register src2,
3657                                     const MemOperand& mem) {
3658 #if V8_TARGET_ARCH_S390X
3659   DCHECK(is_int20(mem.offset()));
3660   stmg(src1, src2, mem);
3661 #else
3662   if (is_uint12(mem.offset())) {
3663     stm(src1, src2, mem);
3664   } else {
3665     DCHECK(is_int20(mem.offset()));
3666     stmy(src1, src2, mem);
3667   }
3668 #endif
3669 }
3670 
3671 void TurboAssembler::LoadMultipleW(Register dst1, Register dst2,
3672                                    const MemOperand& mem) {
3673   if (is_uint12(mem.offset())) {
3674     lm(dst1, dst2, mem);
3675   } else {
3676     DCHECK(is_int20(mem.offset()));
3677     lmy(dst1, dst2, mem);
3678   }
3679 }
3680 
3681 void TurboAssembler::StoreMultipleW(Register src1, Register src2,
3682                                     const MemOperand& mem) {
3683   if (is_uint12(mem.offset())) {
3684     stm(src1, src2, mem);
3685   } else {
3686     DCHECK(is_int20(mem.offset()));
3687     stmy(src1, src2, mem);
3688   }
3689 }
3690 
3691 // Load 32-bits and sign extend if necessary.
3692 void TurboAssembler::LoadS32(Register dst, Register src) {
3693 #if V8_TARGET_ARCH_S390X
3694   lgfr(dst, src);
3695 #else
3696   if (dst != src) lr(dst, src);
3697 #endif
3698 }
3699 
3700 // Load 32-bits and sign extend if necessary.
3701 void TurboAssembler::LoadS32(Register dst, const MemOperand& mem,
3702                            Register scratch) {
3703   int offset = mem.offset();
3704 
3705   if (!is_int20(offset)) {
3706     DCHECK(scratch != no_reg);
3707     mov(scratch, Operand(offset));
3708 #if V8_TARGET_ARCH_S390X
3709     lgf(dst, MemOperand(mem.rb(), scratch));
3710 #else
3711     l(dst, MemOperand(mem.rb(), scratch));
3712 #endif
3713   } else {
3714 #if V8_TARGET_ARCH_S390X
3715     lgf(dst, mem);
3716 #else
3717     if (is_uint12(offset)) {
3718       l(dst, mem);
3719     } else {
3720       ly(dst, mem);
3721     }
3722 #endif
3723   }
3724 }
3725 
3726 // Load 32-bits and zero extend if necessary.
3727 void TurboAssembler::LoadU32(Register dst, Register src) {
3728 #if V8_TARGET_ARCH_S390X
3729   llgfr(dst, src);
3730 #else
3731   if (dst != src) lr(dst, src);
3732 #endif
3733 }
3734 
3735 // Variable length depending on whether offset fits into immediate field
3736 // MemOperand of RX or RXY format
3737 void TurboAssembler::LoadU32(Register dst, const MemOperand& mem,
3738                             Register scratch) {
3739   Register base = mem.rb();
3740   int offset = mem.offset();
3741 
3742 #if V8_TARGET_ARCH_S390X
3743   if (is_int20(offset)) {
3744     llgf(dst, mem);
3745   } else if (scratch != no_reg) {
3746     // Materialize offset into scratch register.
3747     mov(scratch, Operand(offset));
3748     llgf(dst, MemOperand(base, scratch));
3749   } else {
3750     DCHECK(false);
3751   }
3752 #else
3753   bool use_RXform = false;
3754   bool use_RXYform = false;
3755   if (is_uint12(offset)) {
3756     // RX-format supports unsigned 12-bits offset.
3757     use_RXform = true;
3758   } else if (is_int20(offset)) {
3759     // RXY-format supports signed 20-bits offset.
3760     use_RXYform = true;
3761   } else if (scratch != no_reg) {
3762     // Materialize offset into scratch register.
3763     mov(scratch, Operand(offset));
3764   } else {
3765     DCHECK(false);
3766   }
3767 
3768   if (use_RXform) {
3769     l(dst, mem);
3770   } else if (use_RXYform) {
3771     ly(dst, mem);
3772   } else {
3773     ly(dst, MemOperand(base, scratch));
3774   }
3775 #endif
3776 }
3777 
3778 void TurboAssembler::LoadU16(Register dst, const MemOperand& mem) {
3779   // TODO(s390x): Add scratch reg
3780 #if V8_TARGET_ARCH_S390X
3781   llgh(dst, mem);
3782 #else
3783   llh(dst, mem);
3784 #endif
3785 }
3786 
3787 void TurboAssembler::LoadU16(Register dst, Register src) {
3788 #if V8_TARGET_ARCH_S390X
3789   llghr(dst, src);
3790 #else
3791   llhr(dst, src);
3792 #endif
3793 }
3794 
3795 void TurboAssembler::LoadS8(Register dst, const MemOperand& mem) {
3796   // TODO(s390x): Add scratch reg
3797 #if V8_TARGET_ARCH_S390X
3798   lgb(dst, mem);
3799 #else
3800   lb(dst, mem);
3801 #endif
3802 }
3803 
3804 void TurboAssembler::LoadS8(Register dst, Register src) {
3805 #if V8_TARGET_ARCH_S390X
3806   lgbr(dst, src);
3807 #else
3808   lbr(dst, src);
3809 #endif
3810 }
3811 
3812 void TurboAssembler::LoadU8(Register dst, const MemOperand& mem) {
3813   // TODO(s390x): Add scratch reg
3814 #if V8_TARGET_ARCH_S390X
3815   llgc(dst, mem);
3816 #else
3817   llc(dst, mem);
3818 #endif
3819 }
3820 
3821 void TurboAssembler::LoadU8(Register dst, Register src) {
3822 #if V8_TARGET_ARCH_S390X
3823   llgcr(dst, src);
3824 #else
3825   llcr(dst, src);
3826 #endif
3827 }
3828 
3829 #ifdef V8_TARGET_BIG_ENDIAN
3830 void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
3831                                Register scratch) {
3832   lrvg(dst, mem);
3833 }
3834 
3835 void TurboAssembler::LoadS32LE(Register dst, const MemOperand& opnd,
3836                                Register scratch) {
3837   lrv(dst, opnd);
3838   LoadS32(dst, dst);
3839 }
3840 
3841 void TurboAssembler::LoadU32LE(Register dst, const MemOperand& opnd,
3842                                Register scratch) {
3843   lrv(dst, opnd);
3844   LoadU32(dst, dst);
3845 }
3846 
3847 void TurboAssembler::LoadU16LE(Register dst, const MemOperand& opnd) {
3848   lrvh(dst, opnd);
3849   LoadU16(dst, dst);
3850 }
3851 
3852 void TurboAssembler::LoadS16LE(Register dst, const MemOperand& opnd) {
3853   lrvh(dst, opnd);
3854   LoadS16(dst, dst);
3855 }
3856 
3857 void TurboAssembler::LoadV128LE(DoubleRegister dst, const MemOperand& opnd,
3858                                 Register scratch0, Register scratch1) {
3859   bool use_vlbr = CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
3860                   is_uint12(opnd.offset());
3861   if (use_vlbr) {
3862     vlbr(dst, opnd, Condition(4));
3863   } else {
3864     lrvg(scratch0, opnd);
3865     lrvg(scratch1,
3866          MemOperand(opnd.rx(), opnd.rb(), opnd.offset() + kSystemPointerSize));
3867     vlvgp(dst, scratch1, scratch0);
3868   }
3869 }
3870 
3871 void TurboAssembler::LoadF64LE(DoubleRegister dst, const MemOperand& opnd,
3872                                Register scratch) {
3873   lrvg(scratch, opnd);
3874   ldgr(dst, scratch);
3875 }
3876 
3877 void TurboAssembler::LoadF32LE(DoubleRegister dst, const MemOperand& opnd,
3878                                Register scratch) {
3879   lrv(scratch, opnd);
3880   ShiftLeftU64(scratch, scratch, Operand(32));
3881   ldgr(dst, scratch);
3882 }
3883 
3884 void TurboAssembler::StoreU64LE(Register src, const MemOperand& mem,
3885                                 Register scratch) {
3886   if (!is_int20(mem.offset())) {
3887     DCHECK(scratch != no_reg);
3888     DCHECK(scratch != r0);
3889     mov(scratch, Operand(mem.offset()));
3890     strvg(src, MemOperand(mem.rb(), scratch));
3891   } else {
3892     strvg(src, mem);
3893   }
3894 }
3895 
3896 void TurboAssembler::StoreU32LE(Register src, const MemOperand& mem,
3897                                 Register scratch) {
3898   if (!is_int20(mem.offset())) {
3899     DCHECK(scratch != no_reg);
3900     DCHECK(scratch != r0);
3901     mov(scratch, Operand(mem.offset()));
3902     strv(src, MemOperand(mem.rb(), scratch));
3903   } else {
3904     strv(src, mem);
3905   }
3906 }
3907 
3908 void TurboAssembler::StoreU16LE(Register src, const MemOperand& mem,
3909                                 Register scratch) {
3910   if (!is_int20(mem.offset())) {
3911     DCHECK(scratch != no_reg);
3912     DCHECK(scratch != r0);
3913     mov(scratch, Operand(mem.offset()));
3914     strvh(src, MemOperand(mem.rb(), scratch));
3915   } else {
3916     strvh(src, mem);
3917   }
3918 }
3919 
3920 void TurboAssembler::StoreF64LE(DoubleRegister src, const MemOperand& opnd,
3921                                 Register scratch) {
3922   DCHECK(is_uint12(opnd.offset()));
3923   lgdr(scratch, src);
3924   strvg(scratch, opnd);
3925 }
3926 
3927 void TurboAssembler::StoreF32LE(DoubleRegister src, const MemOperand& opnd,
3928                                 Register scratch) {
3929   DCHECK(is_uint12(opnd.offset()));
3930   lgdr(scratch, src);
3931   ShiftRightU64(scratch, scratch, Operand(32));
3932   strv(scratch, opnd);
3933 }
3934 
3935 void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
3936                                  Register scratch1, Register scratch2) {
3937   bool use_vstbr = CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
3938                    is_uint12(mem.offset());
3939   if (use_vstbr) {
3940     vstbr(src, mem, Condition(4));
3941   } else {
3942     vlgv(scratch1, src, MemOperand(r0, 1), Condition(3));
3943     vlgv(scratch2, src, MemOperand(r0, 0), Condition(3));
3944     strvg(scratch1, mem);
3945     strvg(scratch2,
3946           MemOperand(mem.rx(), mem.rb(), mem.offset() + kSystemPointerSize));
3947   }
3948 }
3949 
3950 #else
3951 void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
3952                                Register scratch) {
3953   LoadU64(dst, mem, scratch);
3954 }
3955 
3956 void TurboAssembler::LoadS32LE(Register dst, const MemOperand& opnd,
3957                                Register scratch) {
3958   LoadS32(dst, opnd, scratch);
3959 }
3960 
3961 void TurboAssembler::LoadU32LE(Register dst, const MemOperand& opnd,
3962                                Register scratch) {
3963   LoadU32(dst, opnd, scratch);
3964 }
3965 
3966 void TurboAssembler::LoadU16LE(Register dst, const MemOperand& opnd) {
3967   LoadU16(dst, opnd);
3968 }
3969 
3970 void TurboAssembler::LoadS16LE(Register dst, const MemOperand& opnd) {
3971   LoadS16(dst, opnd);
3972 }
3973 
3974 void TurboAssembler::LoadV128LE(DoubleRegister dst, const MemOperand& opnd,
3975                                 Register scratch0, Register scratch1) {
3976   USE(scratch1);
3977   LoadV128(dst, opnd, scratch0);
3978 }
3979 
3980 void TurboAssembler::LoadF64LE(DoubleRegister dst, const MemOperand& opnd,
3981                                Register scratch) {
3982   USE(scratch);
3983   LoadF64(dst, opnd);
3984 }
3985 
3986 void TurboAssembler::LoadF32LE(DoubleRegister dst, const MemOperand& opnd,
3987                                Register scratch) {
3988   USE(scratch);
3989   LoadF32(dst, opnd);
3990 }
3991 
3992 void TurboAssembler::StoreU64LE(Register src, const MemOperand& mem,
3993                                 Register scratch) {
3994   StoreU64(src, mem, scratch);
3995 }
3996 
3997 void TurboAssembler::StoreU32LE(Register src, const MemOperand& mem,
3998                                 Register scratch) {
3999   StoreU32(src, mem, scratch);
4000 }
4001 
4002 void TurboAssembler::StoreU16LE(Register src, const MemOperand& mem,
4003                                 Register scratch) {
4004   StoreU16(src, mem, scratch);
4005 }
4006 
4007 void TurboAssembler::StoreF64LE(DoubleRegister src, const MemOperand& opnd,
4008                                 Register scratch) {
4009   StoreF64(src, opnd);
4010 }
4011 
4012 void TurboAssembler::StoreF32LE(DoubleRegister src, const MemOperand& opnd,
4013                                 Register scratch) {
4014   StoreF32(src, opnd);
4015 }
4016 
4017 void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
4018                                  Register scratch1, Register scratch2) {
4019   StoreV128(src, mem, scratch1);
4020 }
4021 
4022 #endif
4023 
4024 // Load And Test (Reg <- Reg)
4025 void TurboAssembler::LoadAndTest32(Register dst, Register src) {
4026   ltr(dst, src);
4027 }
4028 
4029 // Load And Test Pointer Sized (Reg <- Reg)
4030 void TurboAssembler::LoadAndTestP(Register dst, Register src) {
4031 #if V8_TARGET_ARCH_S390X
4032   ltgr(dst, src);
4033 #else
4034   ltr(dst, src);
4035 #endif
4036 }
4037 
4038 // Load And Test 32-bit (Reg <- Mem)
4039 void TurboAssembler::LoadAndTest32(Register dst, const MemOperand& mem) {
4040   lt_z(dst, mem);
4041 }
4042 
4043 // Load And Test Pointer Sized (Reg <- Mem)
4044 void TurboAssembler::LoadAndTestP(Register dst, const MemOperand& mem) {
4045 #if V8_TARGET_ARCH_S390X
4046   ltg(dst, mem);
4047 #else
4048   lt_z(dst, mem);
4049 #endif
4050 }
4051 
4052 // Load On Condition Pointer Sized (Reg <- Reg)
4053 void TurboAssembler::LoadOnConditionP(Condition cond, Register dst,
4054                                       Register src) {
4055 #if V8_TARGET_ARCH_S390X
4056   locgr(cond, dst, src);
4057 #else
4058   locr(cond, dst, src);
4059 #endif
4060 }
4061 
4062 // Load Double Precision (64-bit) Floating Point number from memory
4063 void TurboAssembler::LoadF64(DoubleRegister dst, const MemOperand& mem) {
4064   // for 32bit and 64bit we all use 64bit floating point regs
4065   if (is_uint12(mem.offset())) {
4066     ld(dst, mem);
4067   } else {
4068     ldy(dst, mem);
4069   }
4070 }
4071 
4072 // Load Single Precision (32-bit) Floating Point number from memory
4073 void TurboAssembler::LoadF32(DoubleRegister dst, const MemOperand& mem) {
4074   if (is_uint12(mem.offset())) {
4075     le_z(dst, mem);
4076   } else {
4077     DCHECK(is_int20(mem.offset()));
4078     ley(dst, mem);
4079   }
4080 }
4081 
4082 void TurboAssembler::LoadV128(Simd128Register dst, const MemOperand& mem,
4083                               Register scratch) {
4084   DCHECK(scratch != r0);
4085   if (is_uint12(mem.offset())) {
4086     vl(dst, mem, Condition(0));
4087   } else {
4088     DCHECK(is_int20(mem.offset()));
4089     lay(scratch, mem);
4090     vl(dst, MemOperand(scratch), Condition(0));
4091   }
4092 }
4093 
4094 // Store Double Precision (64-bit) Floating Point number to memory
4095 void TurboAssembler::StoreF64(DoubleRegister dst, const MemOperand& mem) {
4096   if (is_uint12(mem.offset())) {
4097     std(dst, mem);
4098   } else {
4099     stdy(dst, mem);
4100   }
4101 }
4102 
4103 // Store Single Precision (32-bit) Floating Point number to memory
4104 void TurboAssembler::StoreF32(DoubleRegister src, const MemOperand& mem) {
4105   if (is_uint12(mem.offset())) {
4106     ste(src, mem);
4107   } else {
4108     stey(src, mem);
4109   }
4110 }
4111 
4112 void TurboAssembler::StoreV128(Simd128Register src, const MemOperand& mem,
4113                                Register scratch) {
4114   DCHECK(scratch != r0);
4115   if (is_uint12(mem.offset())) {
4116     vst(src, mem, Condition(0));
4117   } else {
4118     DCHECK(is_int20(mem.offset()));
4119     lay(scratch, mem);
4120     vst(src, MemOperand(scratch), Condition(0));
4121   }
4122 }
4123 
4124 void TurboAssembler::AddF32(DoubleRegister dst, DoubleRegister lhs,
4125                             DoubleRegister rhs) {
4126   if (dst == lhs) {
4127     aebr(dst, rhs);
4128   } else if (dst == rhs) {
4129     aebr(dst, lhs);
4130   } else {
4131     ler(dst, lhs);
4132     aebr(dst, rhs);
4133   }
4134 }
4135 
4136 void TurboAssembler::SubF32(DoubleRegister dst, DoubleRegister lhs,
4137                             DoubleRegister rhs) {
4138   if (dst == lhs) {
4139     sebr(dst, rhs);
4140   } else if (dst == rhs) {
4141     sebr(dst, lhs);
4142     lcebr(dst, dst);
4143   } else {
4144     ler(dst, lhs);
4145     sebr(dst, rhs);
4146   }
4147 }
4148 
4149 void TurboAssembler::MulF32(DoubleRegister dst, DoubleRegister lhs,
4150                             DoubleRegister rhs) {
4151   if (dst == lhs) {
4152     meebr(dst, rhs);
4153   } else if (dst == rhs) {
4154     meebr(dst, lhs);
4155   } else {
4156     ler(dst, lhs);
4157     meebr(dst, rhs);
4158   }
4159 }
4160 
4161 void TurboAssembler::DivF32(DoubleRegister dst, DoubleRegister lhs,
4162                             DoubleRegister rhs) {
4163   if (dst == lhs) {
4164     debr(dst, rhs);
4165   } else if (dst == rhs) {
4166     lay(sp, MemOperand(sp, -kSystemPointerSize));
4167     StoreF32(dst, MemOperand(sp));
4168     ler(dst, lhs);
4169     deb(dst, MemOperand(sp));
4170     la(sp, MemOperand(sp, kSystemPointerSize));
4171   } else {
4172     ler(dst, lhs);
4173     debr(dst, rhs);
4174   }
4175 }
4176 
4177 void TurboAssembler::AddF64(DoubleRegister dst, DoubleRegister lhs,
4178                             DoubleRegister rhs) {
4179   if (dst == lhs) {
4180     adbr(dst, rhs);
4181   } else if (dst == rhs) {
4182     adbr(dst, lhs);
4183   } else {
4184     ldr(dst, lhs);
4185     adbr(dst, rhs);
4186   }
4187 }
4188 
4189 void TurboAssembler::SubF64(DoubleRegister dst, DoubleRegister lhs,
4190                             DoubleRegister rhs) {
4191   if (dst == lhs) {
4192     sdbr(dst, rhs);
4193   } else if (dst == rhs) {
4194     sdbr(dst, lhs);
4195     lcdbr(dst, dst);
4196   } else {
4197     ldr(dst, lhs);
4198     sdbr(dst, rhs);
4199   }
4200 }
4201 
4202 void TurboAssembler::MulF64(DoubleRegister dst, DoubleRegister lhs,
4203                             DoubleRegister rhs) {
4204   if (dst == lhs) {
4205     mdbr(dst, rhs);
4206   } else if (dst == rhs) {
4207     mdbr(dst, lhs);
4208   } else {
4209     ldr(dst, lhs);
4210     mdbr(dst, rhs);
4211   }
4212 }
4213 
4214 void TurboAssembler::DivF64(DoubleRegister dst, DoubleRegister lhs,
4215                             DoubleRegister rhs) {
4216   if (dst == lhs) {
4217     ddbr(dst, rhs);
4218   } else if (dst == rhs) {
4219     lay(sp, MemOperand(sp, -kSystemPointerSize));
4220     StoreF64(dst, MemOperand(sp));
4221     ldr(dst, lhs);
4222     ddb(dst, MemOperand(sp));
4223     la(sp, MemOperand(sp, kSystemPointerSize));
4224   } else {
4225     ldr(dst, lhs);
4226     ddbr(dst, rhs);
4227   }
4228 }
4229 
4230 void TurboAssembler::AddFloat32(DoubleRegister dst, const MemOperand& opnd,
4231                                 DoubleRegister scratch) {
4232   if (is_uint12(opnd.offset())) {
4233     aeb(dst, opnd);
4234   } else {
4235     ley(scratch, opnd);
4236     aebr(dst, scratch);
4237   }
4238 }
4239 
4240 void TurboAssembler::AddFloat64(DoubleRegister dst, const MemOperand& opnd,
4241                                 DoubleRegister scratch) {
4242   if (is_uint12(opnd.offset())) {
4243     adb(dst, opnd);
4244   } else {
4245     ldy(scratch, opnd);
4246     adbr(dst, scratch);
4247   }
4248 }
4249 
4250 void TurboAssembler::SubFloat32(DoubleRegister dst, const MemOperand& opnd,
4251                                 DoubleRegister scratch) {
4252   if (is_uint12(opnd.offset())) {
4253     seb(dst, opnd);
4254   } else {
4255     ley(scratch, opnd);
4256     sebr(dst, scratch);
4257   }
4258 }
4259 
4260 void TurboAssembler::SubFloat64(DoubleRegister dst, const MemOperand& opnd,
4261                                 DoubleRegister scratch) {
4262   if (is_uint12(opnd.offset())) {
4263     sdb(dst, opnd);
4264   } else {
4265     ldy(scratch, opnd);
4266     sdbr(dst, scratch);
4267   }
4268 }
4269 
4270 void TurboAssembler::MulFloat32(DoubleRegister dst, const MemOperand& opnd,
4271                                 DoubleRegister scratch) {
4272   if (is_uint12(opnd.offset())) {
4273     meeb(dst, opnd);
4274   } else {
4275     ley(scratch, opnd);
4276     meebr(dst, scratch);
4277   }
4278 }
4279 
4280 void TurboAssembler::MulFloat64(DoubleRegister dst, const MemOperand& opnd,
4281                                 DoubleRegister scratch) {
4282   if (is_uint12(opnd.offset())) {
4283     mdb(dst, opnd);
4284   } else {
4285     ldy(scratch, opnd);
4286     mdbr(dst, scratch);
4287   }
4288 }
4289 
4290 void TurboAssembler::DivFloat32(DoubleRegister dst, const MemOperand& opnd,
4291                                 DoubleRegister scratch) {
4292   if (is_uint12(opnd.offset())) {
4293     deb(dst, opnd);
4294   } else {
4295     ley(scratch, opnd);
4296     debr(dst, scratch);
4297   }
4298 }
4299 
4300 void TurboAssembler::DivFloat64(DoubleRegister dst, const MemOperand& opnd,
4301                                 DoubleRegister scratch) {
4302   if (is_uint12(opnd.offset())) {
4303     ddb(dst, opnd);
4304   } else {
4305     ldy(scratch, opnd);
4306     ddbr(dst, scratch);
4307   }
4308 }
4309 
4310 void TurboAssembler::LoadF32AsF64(DoubleRegister dst, const MemOperand& opnd,
4311                                   DoubleRegister scratch) {
4312   if (is_uint12(opnd.offset())) {
4313     ldeb(dst, opnd);
4314   } else {
4315     ley(scratch, opnd);
4316     ldebr(dst, scratch);
4317   }
4318 }
4319 
4320 // Variable length depending on whether offset fits into immediate field
4321 // MemOperand of RX or RXY format
4322 void TurboAssembler::StoreU32(Register src, const MemOperand& mem,
4323                               Register scratch) {
4324   Register base = mem.rb();
4325   int offset = mem.offset();
4326 
4327   bool use_RXform = false;
4328   bool use_RXYform = false;
4329 
4330   if (is_uint12(offset)) {
4331     // RX-format supports unsigned 12-bits offset.
4332     use_RXform = true;
4333   } else if (is_int20(offset)) {
4334     // RXY-format supports signed 20-bits offset.
4335     use_RXYform = true;
4336   } else if (scratch != no_reg) {
4337     // Materialize offset into scratch register.
4338     mov(scratch, Operand(offset));
4339   } else {
4340     // scratch is no_reg
4341     DCHECK(false);
4342   }
4343 
4344   if (use_RXform) {
4345     st(src, mem);
4346   } else if (use_RXYform) {
4347     sty(src, mem);
4348   } else {
4349     StoreU32(src, MemOperand(base, scratch));
4350   }
4351 }
4352 
4353 void TurboAssembler::LoadS16(Register dst, Register src) {
4354 #if V8_TARGET_ARCH_S390X
4355   lghr(dst, src);
4356 #else
4357   lhr(dst, src);
4358 #endif
4359 }
4360 
4361 // Loads 16-bits half-word value from memory and sign extends to pointer
4362 // sized register
4363 void TurboAssembler::LoadS16(Register dst, const MemOperand& mem,
4364                                    Register scratch) {
4365   Register base = mem.rb();
4366   int offset = mem.offset();
4367 
4368   if (!is_int20(offset)) {
4369     DCHECK(scratch != no_reg);
4370     mov(scratch, Operand(offset));
4371 #if V8_TARGET_ARCH_S390X
4372     lgh(dst, MemOperand(base, scratch));
4373 #else
4374     lh(dst, MemOperand(base, scratch));
4375 #endif
4376   } else {
4377 #if V8_TARGET_ARCH_S390X
4378     lgh(dst, mem);
4379 #else
4380     if (is_uint12(offset)) {
4381       lh(dst, mem);
4382     } else {
4383       lhy(dst, mem);
4384     }
4385 #endif
4386   }
4387 }
4388 
4389 // Variable length depending on whether offset fits into immediate field
4390 // MemOperand current only supports d-form
4391 void TurboAssembler::StoreU16(Register src, const MemOperand& mem,
4392                               Register scratch) {
4393   Register base = mem.rb();
4394   int offset = mem.offset();
4395 
4396   if (is_uint12(offset)) {
4397     sth(src, mem);
4398   } else if (is_int20(offset)) {
4399     sthy(src, mem);
4400   } else {
4401     DCHECK(scratch != no_reg);
4402     mov(scratch, Operand(offset));
4403     sth(src, MemOperand(base, scratch));
4404   }
4405 }
4406 
4407 // Variable length depending on whether offset fits into immediate field
4408 // MemOperand current only supports d-form
4409 void TurboAssembler::StoreU8(Register src, const MemOperand& mem,
4410                              Register scratch) {
4411   Register base = mem.rb();
4412   int offset = mem.offset();
4413 
4414   if (is_uint12(offset)) {
4415     stc(src, mem);
4416   } else if (is_int20(offset)) {
4417     stcy(src, mem);
4418   } else {
4419     DCHECK(scratch != no_reg);
4420     mov(scratch, Operand(offset));
4421     stc(src, MemOperand(base, scratch));
4422   }
4423 }
4424 
4425 // Shift left logical for 32-bit integer types.
4426 void TurboAssembler::ShiftLeftU32(Register dst, Register src,
4427                                   const Operand& val) {
4428   ShiftLeftU32(dst, src, r0, val);
4429 }
4430 
4431 // Shift left logical for 32-bit integer types.
4432 void TurboAssembler::ShiftLeftU32(Register dst, Register src, Register val,
4433                                   const Operand& val2) {
4434   if (dst == src) {
4435     sll(dst, val, val2);
4436   } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4437     sllk(dst, src, val, val2);
4438   } else {
4439     DCHECK(dst != val || val == r0);  // The lr/sll path clobbers val.
4440     lr(dst, src);
4441     sll(dst, val, val2);
4442   }
4443 }
4444 
4445 // Shift left logical for 32-bit integer types.
4446 void TurboAssembler::ShiftLeftU64(Register dst, Register src,
4447                                   const Operand& val) {
4448   ShiftLeftU64(dst, src, r0, val);
4449 }
4450 
4451 // Shift left logical for 32-bit integer types.
4452 void TurboAssembler::ShiftLeftU64(Register dst, Register src, Register val,
4453                                   const Operand& val2) {
4454   sllg(dst, src, val, val2);
4455 }
4456 
4457 // Shift right logical for 32-bit integer types.
4458 void TurboAssembler::ShiftRightU32(Register dst, Register src,
4459                                    const Operand& val) {
4460   ShiftRightU32(dst, src, r0, val);
4461 }
4462 
4463 // Shift right logical for 32-bit integer types.
4464 void TurboAssembler::ShiftRightU32(Register dst, Register src, Register val,
4465                                    const Operand& val2) {
4466   if (dst == src) {
4467     srl(dst, val, val2);
4468   } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4469     srlk(dst, src, val, val2);
4470   } else {
4471     DCHECK(dst != val || val == r0);  // The lr/srl path clobbers val.
4472     lr(dst, src);
4473     srl(dst, val, val2);
4474   }
4475 }
4476 
4477 void TurboAssembler::ShiftRightU64(Register dst, Register src, Register val,
4478                                    const Operand& val2) {
4479   srlg(dst, src, val, val2);
4480 }
4481 
4482 // Shift right logical for 64-bit integer types.
4483 void TurboAssembler::ShiftRightU64(Register dst, Register src,
4484                                    const Operand& val) {
4485   ShiftRightU64(dst, src, r0, val);
4486 }
4487 
4488 // Shift right arithmetic for 32-bit integer types.
4489 void TurboAssembler::ShiftRightS32(Register dst, Register src,
4490                                    const Operand& val) {
4491   ShiftRightS32(dst, src, r0, val);
4492 }
4493 
4494 // Shift right arithmetic for 32-bit integer types.
4495 void TurboAssembler::ShiftRightS32(Register dst, Register src, Register val,
4496                                    const Operand& val2) {
4497   if (dst == src) {
4498     sra(dst, val, val2);
4499   } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4500     srak(dst, src, val, val2);
4501   } else {
4502     DCHECK(dst != val || val == r0);  // The lr/sra path clobbers val.
4503     lr(dst, src);
4504     sra(dst, val, val2);
4505   }
4506 }
4507 
4508 // Shift right arithmetic for 64-bit integer types.
4509 void TurboAssembler::ShiftRightS64(Register dst, Register src,
4510                                    const Operand& val) {
4511   ShiftRightS64(dst, src, r0, val);
4512 }
4513 
4514 // Shift right arithmetic for 64-bit integer types.
4515 void TurboAssembler::ShiftRightS64(Register dst, Register src, Register val,
4516                                    const Operand& val2) {
4517   srag(dst, src, val, val2);
4518 }
4519 
4520 // Clear right most # of bits
4521 void TurboAssembler::ClearRightImm(Register dst, Register src,
4522                                    const Operand& val) {
4523   int numBitsToClear = val.immediate() % (kSystemPointerSize * 8);
4524 
4525   // Try to use RISBG if possible
4526   if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
4527     int endBit = 63 - numBitsToClear;
4528     RotateInsertSelectBits(dst, src, Operand::Zero(), Operand(endBit),
4529                            Operand::Zero(), true);
4530     return;
4531   }
4532 
4533   uint64_t hexMask = ~((1L << numBitsToClear) - 1);
4534 
4535   // S390 AND instr clobbers source.  Make a copy if necessary
4536   if (dst != src) mov(dst, src);
4537 
4538   if (numBitsToClear <= 16) {
4539     nill(dst, Operand(static_cast<uint16_t>(hexMask)));
4540   } else if (numBitsToClear <= 32) {
4541     nilf(dst, Operand(static_cast<uint32_t>(hexMask)));
4542   } else if (numBitsToClear <= 64) {
4543     nilf(dst, Operand(static_cast<intptr_t>(0)));
4544     nihf(dst, Operand(hexMask >> 32));
4545   }
4546 }
4547 
4548 void TurboAssembler::Popcnt32(Register dst, Register src) {
4549   DCHECK(src != r0);
4550   DCHECK(dst != r0);
4551 
4552   popcnt(dst, src);
4553   ShiftRightU32(r0, dst, Operand(16));
4554   ar(dst, r0);
4555   ShiftRightU32(r0, dst, Operand(8));
4556   ar(dst, r0);
4557   llgcr(dst, dst);
4558 }
4559 
4560 #ifdef V8_TARGET_ARCH_S390X
4561 void TurboAssembler::Popcnt64(Register dst, Register src) {
4562   DCHECK(src != r0);
4563   DCHECK(dst != r0);
4564 
4565   popcnt(dst, src);
4566   ShiftRightU64(r0, dst, Operand(32));
4567   AddS64(dst, r0);
4568   ShiftRightU64(r0, dst, Operand(16));
4569   AddS64(dst, r0);
4570   ShiftRightU64(r0, dst, Operand(8));
4571   AddS64(dst, r0);
4572   LoadU8(dst, dst);
4573 }
4574 #endif
4575 
4576 void TurboAssembler::SwapP(Register src, Register dst, Register scratch) {
4577   if (src == dst) return;
4578   DCHECK(!AreAliased(src, dst, scratch));
4579   mov(scratch, src);
4580   mov(src, dst);
4581   mov(dst, scratch);
4582 }
4583 
4584 void TurboAssembler::SwapP(Register src, MemOperand dst, Register scratch) {
4585   if (dst.rx() != r0) DCHECK(!AreAliased(src, dst.rx(), scratch));
4586   if (dst.rb() != r0) DCHECK(!AreAliased(src, dst.rb(), scratch));
4587   DCHECK(!AreAliased(src, scratch));
4588   mov(scratch, src);
4589   LoadU64(src, dst);
4590   StoreU64(scratch, dst);
4591 }
4592 
4593 void TurboAssembler::SwapP(MemOperand src, MemOperand dst, Register scratch_0,
4594                            Register scratch_1) {
4595   if (src.rx() != r0) DCHECK(!AreAliased(src.rx(), scratch_0, scratch_1));
4596   if (src.rb() != r0) DCHECK(!AreAliased(src.rb(), scratch_0, scratch_1));
4597   if (dst.rx() != r0) DCHECK(!AreAliased(dst.rx(), scratch_0, scratch_1));
4598   if (dst.rb() != r0) DCHECK(!AreAliased(dst.rb(), scratch_0, scratch_1));
4599   DCHECK(!AreAliased(scratch_0, scratch_1));
4600   LoadU64(scratch_0, src);
4601   LoadU64(scratch_1, dst);
4602   StoreU64(scratch_0, dst);
4603   StoreU64(scratch_1, src);
4604 }
4605 
4606 void TurboAssembler::SwapFloat32(DoubleRegister src, DoubleRegister dst,
4607                                  DoubleRegister scratch) {
4608   if (src == dst) return;
4609   DCHECK(!AreAliased(src, dst, scratch));
4610   ldr(scratch, src);
4611   ldr(src, dst);
4612   ldr(dst, scratch);
4613 }
4614 
4615 void TurboAssembler::SwapFloat32(DoubleRegister src, MemOperand dst,
4616                                  DoubleRegister scratch) {
4617   DCHECK(!AreAliased(src, scratch));
4618   ldr(scratch, src);
4619   LoadF32(src, dst);
4620   StoreF32(scratch, dst);
4621 }
4622 
4623 void TurboAssembler::SwapFloat32(MemOperand src, MemOperand dst,
4624                                  DoubleRegister scratch) {
4625   // push d0, to be used as scratch
4626   lay(sp, MemOperand(sp, -kDoubleSize));
4627   StoreF64(d0, MemOperand(sp));
4628   LoadF32(scratch, src);
4629   LoadF32(d0, dst);
4630   StoreF32(scratch, dst);
4631   StoreF32(d0, src);
4632   // restore d0
4633   LoadF64(d0, MemOperand(sp));
4634   lay(sp, MemOperand(sp, kDoubleSize));
4635 }
4636 
4637 void TurboAssembler::SwapDouble(DoubleRegister src, DoubleRegister dst,
4638                                 DoubleRegister scratch) {
4639   if (src == dst) return;
4640   DCHECK(!AreAliased(src, dst, scratch));
4641   ldr(scratch, src);
4642   ldr(src, dst);
4643   ldr(dst, scratch);
4644 }
4645 
4646 void TurboAssembler::SwapDouble(DoubleRegister src, MemOperand dst,
4647                                 DoubleRegister scratch) {
4648   DCHECK(!AreAliased(src, scratch));
4649   ldr(scratch, src);
4650   LoadF64(src, dst);
4651   StoreF64(scratch, dst);
4652 }
4653 
4654 void TurboAssembler::SwapDouble(MemOperand src, MemOperand dst,
4655                                 DoubleRegister scratch) {
4656   // push d0, to be used as scratch
4657   lay(sp, MemOperand(sp, -kDoubleSize));
4658   StoreF64(d0, MemOperand(sp));
4659   LoadF64(scratch, src);
4660   LoadF64(d0, dst);
4661   StoreF64(scratch, dst);
4662   StoreF64(d0, src);
4663   // restore d0
4664   LoadF64(d0, MemOperand(sp));
4665   lay(sp, MemOperand(sp, kDoubleSize));
4666 }
4667 
4668 void TurboAssembler::SwapSimd128(Simd128Register src, Simd128Register dst,
4669                                  Simd128Register scratch) {
4670   if (src == dst) return;
4671   vlr(scratch, src, Condition(0), Condition(0), Condition(0));
4672   vlr(src, dst, Condition(0), Condition(0), Condition(0));
4673   vlr(dst, scratch, Condition(0), Condition(0), Condition(0));
4674 }
4675 
4676 void TurboAssembler::SwapSimd128(Simd128Register src, MemOperand dst,
4677                                  Simd128Register scratch) {
4678   DCHECK(!AreAliased(src, scratch));
4679   vlr(scratch, src, Condition(0), Condition(0), Condition(0));
4680   LoadV128(src, dst, ip);
4681   StoreV128(scratch, dst, ip);
4682 }
4683 
4684 void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
4685                                  Simd128Register scratch) {
4686   // push d0, to be used as scratch
4687   lay(sp, MemOperand(sp, -kSimd128Size));
4688   StoreV128(d0, MemOperand(sp), ip);
4689   LoadV128(scratch, src, ip);
4690   LoadV128(d0, dst, ip);
4691   StoreV128(scratch, dst, ip);
4692   StoreV128(d0, src, ip);
4693   // restore d0
4694   LoadV128(d0, MemOperand(sp), ip);
4695   lay(sp, MemOperand(sp, kSimd128Size));
4696 }
4697 
4698 void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4699   larl(dst, Operand(-pc_offset() / 2));
4700 }
4701 
4702 void TurboAssembler::LoadPC(Register dst) {
4703   Label current_pc;
4704   larl(dst, &current_pc);
4705   bind(&current_pc);
4706 }
4707 
4708 void TurboAssembler::JumpIfEqual(Register x, int32_t y, Label* dest) {
4709   CmpS32(x, Operand(y));
4710   beq(dest);
4711 }
4712 
4713 void TurboAssembler::JumpIfLessThan(Register x, int32_t y, Label* dest) {
4714   CmpS32(x, Operand(y));
4715   blt(dest);
4716 }
4717 
4718 void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) {
4719   STATIC_ASSERT(kSystemPointerSize == 8);
4720   STATIC_ASSERT(kSmiTagSize == 1);
4721   STATIC_ASSERT(kSmiTag == 0);
4722   // The builtin_index register contains the builtin index as a Smi.
4723   if (SmiValuesAre32Bits()) {
4724     ShiftRightS64(builtin_index, builtin_index,
4725                   Operand(kSmiShift - kSystemPointerSizeLog2));
4726   } else {
4727     DCHECK(SmiValuesAre31Bits());
4728     ShiftLeftU64(builtin_index, builtin_index,
4729                  Operand(kSystemPointerSizeLog2 - kSmiShift));
4730   }
4731   LoadU64(builtin_index, MemOperand(kRootRegister, builtin_index,
4732                                     IsolateData::builtin_entry_table_offset()));
4733 }
4734 
4735 void TurboAssembler::CallBuiltinByIndex(Register builtin_index) {
4736   LoadEntryFromBuiltinIndex(builtin_index);
4737   Call(builtin_index);
4738 }
4739 
4740 void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
4741                                           Register destination) {
4742   ASM_CODE_COMMENT(this);
4743   LoadU64(destination, EntryFromBuiltinAsOperand(builtin));
4744 }
4745 
4746 MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
4747   ASM_CODE_COMMENT(this);
4748   DCHECK(root_array_available());
4749   return MemOperand(kRootRegister,
4750                     IsolateData::BuiltinEntrySlotOffset(builtin));
4751 }
4752 
4753 void TurboAssembler::LoadCodeObjectEntry(Register destination,
4754                                          Register code_object) {
4755   // Code objects are called differently depending on whether we are generating
4756   // builtin code (which will later be embedded into the binary) or compiling
4757   // user JS code at runtime.
4758   // * Builtin code runs in --jitless mode and thus must not call into on-heap
4759   //   Code targets. Instead, we dispatch through the builtins entry table.
4760   // * Codegen at runtime does not have this restriction and we can use the
4761   //   shorter, branchless instruction sequence. The assumption here is that
4762   //   targets are usually generated code and not builtin Code objects.
4763 
4764   if (options().isolate_independent_code) {
4765     DCHECK(root_array_available());
4766     Label if_code_is_off_heap, out;
4767 
4768     Register scratch = r1;
4769 
4770     DCHECK(!AreAliased(destination, scratch));
4771     DCHECK(!AreAliased(code_object, scratch));
4772 
4773     // Check whether the Code object is an off-heap trampoline. If so, call its
4774     // (off-heap) entry point directly without going through the (on-heap)
4775     // trampoline.  Otherwise, just call the Code object as always.
4776     LoadS32(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
4777     tmlh(scratch, Operand(Code::IsOffHeapTrampoline::kMask >> 16));
4778     bne(&if_code_is_off_heap);
4779 
4780     // Not an off-heap trampoline, the entry point is at
4781     // Code::raw_instruction_start().
4782     AddS64(destination, code_object,
4783            Operand(Code::kHeaderSize - kHeapObjectTag));
4784     b(&out);
4785 
4786     // An off-heap trampoline, the entry point is loaded from the builtin entry
4787     // table.
4788     bind(&if_code_is_off_heap);
4789     LoadS32(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
4790     ShiftLeftU64(destination, scratch, Operand(kSystemPointerSizeLog2));
4791     AddS64(destination, destination, kRootRegister);
4792     LoadU64(destination,
4793             MemOperand(destination, IsolateData::builtin_entry_table_offset()));
4794 
4795     bind(&out);
4796   } else {
4797     AddS64(destination, code_object,
4798            Operand(Code::kHeaderSize - kHeapObjectTag));
4799   }
4800 }
4801 
4802 void TurboAssembler::CallCodeObject(Register code_object) {
4803   LoadCodeObjectEntry(code_object, code_object);
4804   Call(code_object);
4805 }
4806 
4807 void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
4808   DCHECK_EQ(JumpMode::kJump, jump_mode);
4809   LoadCodeObjectEntry(code_object, code_object);
4810   Jump(code_object);
4811 }
4812 
4813 void TurboAssembler::StoreReturnAddressAndCall(Register target) {
4814   // This generates the final instruction sequence for calls to C functions
4815   // once an exit frame has been constructed.
4816   //
4817   // Note that this assumes the caller code (i.e. the Code object currently
4818   // being generated) is immovable or that the callee function cannot trigger
4819   // GC, since the callee function will return to it.
4820 
4821   Label return_label;
4822   larl(r14, &return_label);  // Generate the return addr of call later.
4823   StoreU64(r14, MemOperand(sp, kStackFrameRASlot * kSystemPointerSize));
4824 
4825   // zLinux ABI requires caller's frame to have sufficient space for callee
4826   // preserved regsiter save area.
4827   b(target);
4828   bind(&return_label);
4829 }
4830 
4831 void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4832                                            DeoptimizeKind kind, Label* ret,
4833                                            Label*) {
4834   ASM_CODE_COMMENT(this);
4835   LoadU64(ip, MemOperand(kRootRegister,
4836                          IsolateData::BuiltinEntrySlotOffset(target)));
4837   Call(ip);
4838   DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4839             (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4840                                             : Deoptimizer::kEagerDeoptExitSize);
4841 }
4842 
4843 void TurboAssembler::Trap() { stop(); }
4844 void TurboAssembler::DebugBreak() { stop(); }
4845 
4846 void TurboAssembler::CountLeadingZerosU32(Register dst, Register src,
4847                                           Register scratch_pair) {
4848   llgfr(dst, src);
4849   flogr(scratch_pair,
4850         dst);  // will modify a register pair scratch and scratch + 1
4851   AddS32(dst, scratch_pair, Operand(-32));
4852 }
4853 
4854 void TurboAssembler::CountLeadingZerosU64(Register dst, Register src,
4855                                           Register scratch_pair) {
4856   flogr(scratch_pair,
4857         src);  // will modify a register pair scratch and scratch + 1
4858   mov(dst, scratch_pair);
4859 }
4860 
4861 void TurboAssembler::CountTrailingZerosU32(Register dst, Register src,
4862                                            Register scratch_pair) {
4863   Register scratch0 = scratch_pair;
4864   Register scratch1 = Register::from_code(scratch_pair.code() + 1);
4865   DCHECK(!AreAliased(dst, scratch0, scratch1));
4866   DCHECK(!AreAliased(src, scratch0, scratch1));
4867 
4868   Label done;
4869   // Check if src is all zeros.
4870   ltr(scratch1, src);
4871   mov(dst, Operand(32));
4872   beq(&done);
4873   llgfr(scratch1, scratch1);
4874   lcgr(scratch0, scratch1);
4875   ngr(scratch1, scratch0);
4876   flogr(scratch0, scratch1);
4877   mov(dst, Operand(63));
4878   SubS64(dst, scratch0);
4879   bind(&done);
4880 }
4881 
4882 void TurboAssembler::CountTrailingZerosU64(Register dst, Register src,
4883                                            Register scratch_pair) {
4884   Register scratch0 = scratch_pair;
4885   Register scratch1 = Register::from_code(scratch_pair.code() + 1);
4886   DCHECK(!AreAliased(dst, scratch0, scratch1));
4887   DCHECK(!AreAliased(src, scratch0, scratch1));
4888 
4889   Label done;
4890   // Check if src is all zeros.
4891   ltgr(scratch1, src);
4892   mov(dst, Operand(64));
4893   beq(&done);
4894   lcgr(scratch0, scratch1);
4895   ngr(scratch0, scratch1);
4896   flogr(scratch0, scratch0);
4897   mov(dst, Operand(63));
4898   SubS64(dst, scratch0);
4899   bind(&done);
4900 }
4901 
4902 void TurboAssembler::AtomicCmpExchangeHelper(Register addr, Register output,
4903                                              Register old_value,
4904                                              Register new_value, int start,
4905                                              int end, int shift_amount,
4906                                              int offset, Register temp0,
4907                                              Register temp1) {
4908   LoadU32(temp0, MemOperand(addr, offset));
4909   llgfr(temp1, temp0);
4910   RotateInsertSelectBits(temp0, old_value, Operand(start), Operand(end),
4911                          Operand(shift_amount), false);
4912   RotateInsertSelectBits(temp1, new_value, Operand(start), Operand(end),
4913                          Operand(shift_amount), false);
4914   CmpAndSwap(temp0, temp1, MemOperand(addr, offset));
4915   RotateInsertSelectBits(output, temp0, Operand(start + shift_amount),
4916                          Operand(end + shift_amount),
4917                          Operand(64 - shift_amount), true);
4918 }
4919 
4920 void TurboAssembler::AtomicCmpExchangeU8(Register addr, Register output,
4921                                          Register old_value, Register new_value,
4922                                          Register temp0, Register temp1) {
4923 #ifdef V8_TARGET_BIG_ENDIAN
4924 #define ATOMIC_COMP_EXCHANGE_BYTE(i)                                        \
4925   {                                                                         \
4926     constexpr int idx = (i);                                                \
4927     static_assert(idx <= 3 && idx >= 0, "idx is out of range!");            \
4928     constexpr int start = 32 + 8 * idx;                                     \
4929     constexpr int end = start + 7;                                          \
4930     constexpr int shift_amount = (3 - idx) * 8;                             \
4931     AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4932                             shift_amount, -idx, temp0, temp1);              \
4933   }
4934 #else
4935 #define ATOMIC_COMP_EXCHANGE_BYTE(i)                                        \
4936   {                                                                         \
4937     constexpr int idx = (i);                                                \
4938     static_assert(idx <= 3 && idx >= 0, "idx is out of range!");            \
4939     constexpr int start = 32 + 8 * (3 - idx);                               \
4940     constexpr int end = start + 7;                                          \
4941     constexpr int shift_amount = idx * 8;                                   \
4942     AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4943                             shift_amount, -idx, temp0, temp1);              \
4944   }
4945 #endif
4946 
4947   Label one, two, three, done;
4948   tmll(addr, Operand(3));
4949   b(Condition(1), &three);
4950   b(Condition(2), &two);
4951   b(Condition(4), &one);
4952   /* ending with 0b00 */
4953   ATOMIC_COMP_EXCHANGE_BYTE(0);
4954   b(&done);
4955   /* ending with 0b01 */
4956   bind(&one);
4957   ATOMIC_COMP_EXCHANGE_BYTE(1);
4958   b(&done);
4959   /* ending with 0b10 */
4960   bind(&two);
4961   ATOMIC_COMP_EXCHANGE_BYTE(2);
4962   b(&done);
4963   /* ending with 0b11 */
4964   bind(&three);
4965   ATOMIC_COMP_EXCHANGE_BYTE(3);
4966   bind(&done);
4967 }
4968 
4969 void TurboAssembler::AtomicCmpExchangeU16(Register addr, Register output,
4970                                           Register old_value,
4971                                           Register new_value, Register temp0,
4972                                           Register temp1) {
4973 #ifdef V8_TARGET_BIG_ENDIAN
4974 #define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                                    \
4975   {                                                                         \
4976     constexpr int idx = (i);                                                \
4977     static_assert(idx <= 1 && idx >= 0, "idx is out of range!");            \
4978     constexpr int start = 32 + 16 * idx;                                    \
4979     constexpr int end = start + 15;                                         \
4980     constexpr int shift_amount = (1 - idx) * 16;                            \
4981     AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4982                             shift_amount, -idx * 2, temp0, temp1);          \
4983   }
4984 #else
4985 #define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                                    \
4986   {                                                                         \
4987     constexpr int idx = (i);                                                \
4988     static_assert(idx <= 1 && idx >= 0, "idx is out of range!");            \
4989     constexpr int start = 32 + 16 * (1 - idx);                              \
4990     constexpr int end = start + 15;                                         \
4991     constexpr int shift_amount = idx * 16;                                  \
4992     AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4993                             shift_amount, -idx * 2, temp0, temp1);          \
4994   }
4995 #endif
4996 
4997   Label two, done;
4998   tmll(addr, Operand(3));
4999   b(Condition(2), &two);
5000   ATOMIC_COMP_EXCHANGE_HALFWORD(0);
5001   b(&done);
5002   bind(&two);
5003   ATOMIC_COMP_EXCHANGE_HALFWORD(1);
5004   bind(&done);
5005 }
5006 
5007 void TurboAssembler::AtomicExchangeHelper(Register addr, Register value,
5008                                           Register output, int start, int end,
5009                                           int shift_amount, int offset,
5010                                           Register scratch) {
5011   Label do_cs;
5012   LoadU32(output, MemOperand(addr, offset));
5013   bind(&do_cs);
5014   llgfr(scratch, output);
5015   RotateInsertSelectBits(scratch, value, Operand(start), Operand(end),
5016                          Operand(shift_amount), false);
5017   csy(output, scratch, MemOperand(addr, offset));
5018   bne(&do_cs, Label::kNear);
5019   srl(output, Operand(shift_amount));
5020 }
5021 
5022 void TurboAssembler::AtomicExchangeU8(Register addr, Register value,
5023                                       Register output, Register scratch) {
5024 #ifdef V8_TARGET_BIG_ENDIAN
5025 #define ATOMIC_EXCHANGE_BYTE(i)                                               \
5026   {                                                                           \
5027     constexpr int idx = (i);                                                  \
5028     static_assert(idx <= 3 && idx >= 0, "idx is out of range!");              \
5029     constexpr int start = 32 + 8 * idx;                                       \
5030     constexpr int end = start + 7;                                            \
5031     constexpr int shift_amount = (3 - idx) * 8;                               \
5032     AtomicExchangeHelper(addr, value, output, start, end, shift_amount, -idx, \
5033                          scratch);                                            \
5034   }
5035 #else
5036 #define ATOMIC_EXCHANGE_BYTE(i)                                               \
5037   {                                                                           \
5038     constexpr int idx = (i);                                                  \
5039     static_assert(idx <= 3 && idx >= 0, "idx is out of range!");              \
5040     constexpr int start = 32 + 8 * (3 - idx);                                 \
5041     constexpr int end = start + 7;                                            \
5042     constexpr int shift_amount = idx * 8;                                     \
5043     AtomicExchangeHelper(addr, value, output, start, end, shift_amount, -idx, \
5044                          scratch);                                            \
5045   }
5046 #endif
5047   Label three, two, one, done;
5048   tmll(addr, Operand(3));
5049   b(Condition(1), &three);
5050   b(Condition(2), &two);
5051   b(Condition(4), &one);
5052 
5053   // end with 0b00
5054   ATOMIC_EXCHANGE_BYTE(0);
5055   b(&done);
5056 
5057   // ending with 0b01
5058   bind(&one);
5059   ATOMIC_EXCHANGE_BYTE(1);
5060   b(&done);
5061 
5062   // ending with 0b10
5063   bind(&two);
5064   ATOMIC_EXCHANGE_BYTE(2);
5065   b(&done);
5066 
5067   // ending with 0b11
5068   bind(&three);
5069   ATOMIC_EXCHANGE_BYTE(3);
5070 
5071   bind(&done);
5072 }
5073 
5074 void TurboAssembler::AtomicExchangeU16(Register addr, Register value,
5075                                        Register output, Register scratch) {
5076 #ifdef V8_TARGET_BIG_ENDIAN
5077 #define ATOMIC_EXCHANGE_HALFWORD(i)                                     \
5078   {                                                                     \
5079     constexpr int idx = (i);                                            \
5080     static_assert(idx <= 1 && idx >= 0, "idx is out of range!");        \
5081     constexpr int start = 32 + 16 * idx;                                \
5082     constexpr int end = start + 15;                                     \
5083     constexpr int shift_amount = (1 - idx) * 16;                        \
5084     AtomicExchangeHelper(addr, value, output, start, end, shift_amount, \
5085                          -idx * 2, scratch);                            \
5086   }
5087 #else
5088 #define ATOMIC_EXCHANGE_HALFWORD(i)                                     \
5089   {                                                                     \
5090     constexpr int idx = (i);                                            \
5091     static_assert(idx <= 1 && idx >= 0, "idx is out of range!");        \
5092     constexpr int start = 32 + 16 * (1 - idx);                          \
5093     constexpr int end = start + 15;                                     \
5094     constexpr int shift_amount = idx * 16;                              \
5095     AtomicExchangeHelper(addr, value, output, start, end, shift_amount, \
5096                          -idx * 2, scratch);                            \
5097   }
5098 #endif
5099   Label two, done;
5100   tmll(addr, Operand(3));
5101   b(Condition(2), &two);
5102 
5103   // end with 0b00
5104   ATOMIC_EXCHANGE_HALFWORD(0);
5105   b(&done);
5106 
5107   // ending with 0b10
5108   bind(&two);
5109   ATOMIC_EXCHANGE_HALFWORD(1);
5110 
5111   bind(&done);
5112 }
5113 
5114 // Simd Support.
5115 void TurboAssembler::F64x2Splat(Simd128Register dst, Simd128Register src) {
5116   vrep(dst, src, Operand(0), Condition(3));
5117 }
5118 
5119 void TurboAssembler::F32x4Splat(Simd128Register dst, Simd128Register src) {
5120   vrep(dst, src, Operand(0), Condition(2));
5121 }
5122 
5123 void TurboAssembler::I64x2Splat(Simd128Register dst, Register src) {
5124   vlvg(dst, src, MemOperand(r0, 0), Condition(3));
5125   vrep(dst, dst, Operand(0), Condition(3));
5126 }
5127 
5128 void TurboAssembler::I32x4Splat(Simd128Register dst, Register src) {
5129   vlvg(dst, src, MemOperand(r0, 0), Condition(2));
5130   vrep(dst, dst, Operand(0), Condition(2));
5131 }
5132 
5133 void TurboAssembler::I16x8Splat(Simd128Register dst, Register src) {
5134   vlvg(dst, src, MemOperand(r0, 0), Condition(1));
5135   vrep(dst, dst, Operand(0), Condition(1));
5136 }
5137 
5138 void TurboAssembler::I8x16Splat(Simd128Register dst, Register src) {
5139   vlvg(dst, src, MemOperand(r0, 0), Condition(0));
5140   vrep(dst, dst, Operand(0), Condition(0));
5141 }
5142 
5143 void TurboAssembler::F64x2ExtractLane(DoubleRegister dst, Simd128Register src,
5144                                       uint8_t imm_lane_idx, Register) {
5145   vrep(dst, src, Operand(1 - imm_lane_idx), Condition(3));
5146 }
5147 
5148 void TurboAssembler::F32x4ExtractLane(DoubleRegister dst, Simd128Register src,
5149                                       uint8_t imm_lane_idx, Register) {
5150   vrep(dst, src, Operand(3 - imm_lane_idx), Condition(2));
5151 }
5152 
5153 void TurboAssembler::I64x2ExtractLane(Register dst, Simd128Register src,
5154                                       uint8_t imm_lane_idx, Register) {
5155   vlgv(dst, src, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5156 }
5157 
5158 void TurboAssembler::I32x4ExtractLane(Register dst, Simd128Register src,
5159                                       uint8_t imm_lane_idx, Register) {
5160   vlgv(dst, src, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5161 }
5162 
5163 void TurboAssembler::I16x8ExtractLaneU(Register dst, Simd128Register src,
5164                                        uint8_t imm_lane_idx, Register) {
5165   vlgv(dst, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5166 }
5167 
5168 void TurboAssembler::I16x8ExtractLaneS(Register dst, Simd128Register src,
5169                                        uint8_t imm_lane_idx, Register scratch) {
5170   vlgv(scratch, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5171   lghr(dst, scratch);
5172 }
5173 
5174 void TurboAssembler::I8x16ExtractLaneU(Register dst, Simd128Register src,
5175                                        uint8_t imm_lane_idx, Register) {
5176   vlgv(dst, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5177 }
5178 
5179 void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src,
5180                                        uint8_t imm_lane_idx, Register scratch) {
5181   vlgv(scratch, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5182   lgbr(dst, scratch);
5183 }
5184 
5185 void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
5186                                       DoubleRegister src2, uint8_t imm_lane_idx,
5187                                       Register scratch) {
5188   vlgv(scratch, src2, MemOperand(r0, 0), Condition(3));
5189   if (src1 != dst) {
5190     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5191   }
5192   vlvg(dst, scratch, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5193 }
5194 
5195 void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
5196                                       DoubleRegister src2, uint8_t imm_lane_idx,
5197                                       Register scratch) {
5198   vlgv(scratch, src2, MemOperand(r0, 0), Condition(2));
5199   if (src1 != dst) {
5200     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5201   }
5202   vlvg(dst, scratch, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5203 }
5204 
5205 void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
5206                                       Register src2, uint8_t imm_lane_idx,
5207                                       Register) {
5208   if (src1 != dst) {
5209     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5210   }
5211   vlvg(dst, src2, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5212 }
5213 
5214 void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
5215                                       Register src2, uint8_t imm_lane_idx,
5216                                       Register) {
5217   if (src1 != dst) {
5218     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5219   }
5220   vlvg(dst, src2, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5221 }
5222 
5223 void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
5224                                       Register src2, uint8_t imm_lane_idx,
5225                                       Register) {
5226   if (src1 != dst) {
5227     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5228   }
5229   vlvg(dst, src2, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5230 }
5231 
5232 void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
5233                                       Register src2, uint8_t imm_lane_idx,
5234                                       Register) {
5235   if (src1 != dst) {
5236     vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5237   }
5238   vlvg(dst, src2, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5239 }
5240 
5241 void TurboAssembler::S128Not(Simd128Register dst, Simd128Register src) {
5242   vno(dst, src, src, Condition(0), Condition(0), Condition(0));
5243 }
5244 
5245 void TurboAssembler::S128Zero(Simd128Register dst, Simd128Register src) {
5246   vx(dst, src, src, Condition(0), Condition(0), Condition(0));
5247 }
5248 
5249 void TurboAssembler::S128AllOnes(Simd128Register dst, Simd128Register src) {
5250   vceq(dst, src, src, Condition(0), Condition(3));
5251 }
5252 
5253 void TurboAssembler::S128Select(Simd128Register dst, Simd128Register src1,
5254                                 Simd128Register src2, Simd128Register mask) {
5255   vsel(dst, src1, src2, mask, Condition(0), Condition(0));
5256 }
5257 
5258 #define SIMD_UNOP_LIST_VRR_A(V)             \
5259   V(F64x2Abs, vfpso, 2, 0, 3)               \
5260   V(F64x2Neg, vfpso, 0, 0, 3)               \
5261   V(F64x2Sqrt, vfsq, 0, 0, 3)               \
5262   V(F64x2Ceil, vfi, 6, 0, 3)                \
5263   V(F64x2Floor, vfi, 7, 0, 3)               \
5264   V(F64x2Trunc, vfi, 5, 0, 3)               \
5265   V(F64x2NearestInt, vfi, 4, 0, 3)          \
5266   V(F32x4Abs, vfpso, 2, 0, 2)               \
5267   V(F32x4Neg, vfpso, 0, 0, 2)               \
5268   V(F32x4Sqrt, vfsq, 0, 0, 2)               \
5269   V(F32x4Ceil, vfi, 6, 0, 2)                \
5270   V(F32x4Floor, vfi, 7, 0, 2)               \
5271   V(F32x4Trunc, vfi, 5, 0, 2)               \
5272   V(F32x4NearestInt, vfi, 4, 0, 2)          \
5273   V(I64x2Abs, vlp, 0, 0, 3)                 \
5274   V(I64x2Neg, vlc, 0, 0, 3)                 \
5275   V(I64x2SConvertI32x4Low, vupl, 0, 0, 2)   \
5276   V(I64x2SConvertI32x4High, vuph, 0, 0, 2)  \
5277   V(I64x2UConvertI32x4Low, vupll, 0, 0, 2)  \
5278   V(I64x2UConvertI32x4High, vuplh, 0, 0, 2) \
5279   V(I32x4Abs, vlp, 0, 0, 2)                 \
5280   V(I32x4Neg, vlc, 0, 0, 2)                 \
5281   V(I32x4SConvertI16x8Low, vupl, 0, 0, 1)   \
5282   V(I32x4SConvertI16x8High, vuph, 0, 0, 1)  \
5283   V(I32x4UConvertI16x8Low, vupll, 0, 0, 1)  \
5284   V(I32x4UConvertI16x8High, vuplh, 0, 0, 1) \
5285   V(I16x8Abs, vlp, 0, 0, 1)                 \
5286   V(I16x8Neg, vlc, 0, 0, 1)                 \
5287   V(I16x8SConvertI8x16Low, vupl, 0, 0, 0)   \
5288   V(I16x8SConvertI8x16High, vuph, 0, 0, 0)  \
5289   V(I16x8UConvertI8x16Low, vupll, 0, 0, 0)  \
5290   V(I16x8UConvertI8x16High, vuplh, 0, 0, 0) \
5291   V(I8x16Abs, vlp, 0, 0, 0)                 \
5292   V(I8x16Neg, vlc, 0, 0, 0)                 \
5293   V(I8x16Popcnt, vpopct, 0, 0, 0)
5294 
5295 #define EMIT_SIMD_UNOP_VRR_A(name, op, c1, c2, c3)                      \
5296   void TurboAssembler::name(Simd128Register dst, Simd128Register src) { \
5297     op(dst, src, Condition(c1), Condition(c2), Condition(c3));          \
5298   }
5299 SIMD_UNOP_LIST_VRR_A(EMIT_SIMD_UNOP_VRR_A)
5300 #undef EMIT_SIMD_UNOP_VRR_A
5301 #undef SIMD_UNOP_LIST_VRR_A
5302 
5303 #define SIMD_BINOP_LIST_VRR_B(V) \
5304   V(I64x2Eq, vceq, 0, 3)         \
5305   V(I64x2GtS, vch, 0, 3)         \
5306   V(I32x4Eq, vceq, 0, 2)         \
5307   V(I32x4GtS, vch, 0, 2)         \
5308   V(I32x4GtU, vchl, 0, 2)        \
5309   V(I16x8Eq, vceq, 0, 1)         \
5310   V(I16x8GtS, vch, 0, 1)         \
5311   V(I16x8GtU, vchl, 0, 1)        \
5312   V(I8x16Eq, vceq, 0, 0)         \
5313   V(I8x16GtS, vch, 0, 0)         \
5314   V(I8x16GtU, vchl, 0, 0)
5315 
5316 #define EMIT_SIMD_BINOP_VRR_B(name, op, c1, c2)                        \
5317   void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5318                             Simd128Register src2) {                    \
5319     op(dst, src1, src2, Condition(c1), Condition(c2));                 \
5320   }
5321 SIMD_BINOP_LIST_VRR_B(EMIT_SIMD_BINOP_VRR_B)
5322 #undef EMIT_SIMD_BINOP_VRR_B
5323 #undef SIMD_BINOP_LIST_VRR_B
5324 
5325 #define SIMD_BINOP_LIST_VRR_C(V)           \
5326   V(F64x2Add, vfa, 0, 0, 3)                \
5327   V(F64x2Sub, vfs, 0, 0, 3)                \
5328   V(F64x2Mul, vfm, 0, 0, 3)                \
5329   V(F64x2Div, vfd, 0, 0, 3)                \
5330   V(F64x2Min, vfmin, 1, 0, 3)              \
5331   V(F64x2Max, vfmax, 1, 0, 3)              \
5332   V(F64x2Eq, vfce, 0, 0, 3)                \
5333   V(F64x2Pmin, vfmin, 3, 0, 3)             \
5334   V(F64x2Pmax, vfmax, 3, 0, 3)             \
5335   V(F32x4Add, vfa, 0, 0, 2)                \
5336   V(F32x4Sub, vfs, 0, 0, 2)                \
5337   V(F32x4Mul, vfm, 0, 0, 2)                \
5338   V(F32x4Div, vfd, 0, 0, 2)                \
5339   V(F32x4Min, vfmin, 1, 0, 2)              \
5340   V(F32x4Max, vfmax, 1, 0, 2)              \
5341   V(F32x4Eq, vfce, 0, 0, 2)                \
5342   V(F32x4Pmin, vfmin, 3, 0, 2)             \
5343   V(F32x4Pmax, vfmax, 3, 0, 2)             \
5344   V(I64x2Add, va, 0, 0, 3)                 \
5345   V(I64x2Sub, vs, 0, 0, 3)                 \
5346   V(I32x4Add, va, 0, 0, 2)                 \
5347   V(I32x4Sub, vs, 0, 0, 2)                 \
5348   V(I32x4Mul, vml, 0, 0, 2)                \
5349   V(I32x4MinS, vmn, 0, 0, 2)               \
5350   V(I32x4MinU, vmnl, 0, 0, 2)              \
5351   V(I32x4MaxS, vmx, 0, 0, 2)               \
5352   V(I32x4MaxU, vmxl, 0, 0, 2)              \
5353   V(I16x8Add, va, 0, 0, 1)                 \
5354   V(I16x8Sub, vs, 0, 0, 1)                 \
5355   V(I16x8Mul, vml, 0, 0, 1)                \
5356   V(I16x8MinS, vmn, 0, 0, 1)               \
5357   V(I16x8MinU, vmnl, 0, 0, 1)              \
5358   V(I16x8MaxS, vmx, 0, 0, 1)               \
5359   V(I16x8MaxU, vmxl, 0, 0, 1)              \
5360   V(I16x8RoundingAverageU, vavgl, 0, 0, 1) \
5361   V(I8x16Add, va, 0, 0, 0)                 \
5362   V(I8x16Sub, vs, 0, 0, 0)                 \
5363   V(I8x16MinS, vmn, 0, 0, 0)               \
5364   V(I8x16MinU, vmnl, 0, 0, 0)              \
5365   V(I8x16MaxS, vmx, 0, 0, 0)               \
5366   V(I8x16MaxU, vmxl, 0, 0, 0)              \
5367   V(I8x16RoundingAverageU, vavgl, 0, 0, 0) \
5368   V(S128And, vn, 0, 0, 0)                  \
5369   V(S128Or, vo, 0, 0, 0)                   \
5370   V(S128Xor, vx, 0, 0, 0)                  \
5371   V(S128AndNot, vnc, 0, 0, 0)
5372 
5373 #define EMIT_SIMD_BINOP_VRR_C(name, op, c1, c2, c3)                    \
5374   void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5375                             Simd128Register src2) {                    \
5376     op(dst, src1, src2, Condition(c1), Condition(c2), Condition(c3));  \
5377   }
5378 SIMD_BINOP_LIST_VRR_C(EMIT_SIMD_BINOP_VRR_C)
5379 #undef EMIT_SIMD_BINOP_VRR_C
5380 #undef SIMD_BINOP_LIST_VRR_C
5381 
5382 #define SIMD_SHIFT_LIST(V) \
5383   V(I64x2Shl, veslv, 3)    \
5384   V(I64x2ShrS, vesrav, 3)  \
5385   V(I64x2ShrU, vesrlv, 3)  \
5386   V(I32x4Shl, veslv, 2)    \
5387   V(I32x4ShrS, vesrav, 2)  \
5388   V(I32x4ShrU, vesrlv, 2)  \
5389   V(I16x8Shl, veslv, 1)    \
5390   V(I16x8ShrS, vesrav, 1)  \
5391   V(I16x8ShrU, vesrlv, 1)  \
5392   V(I8x16Shl, veslv, 0)    \
5393   V(I8x16ShrS, vesrav, 0)  \
5394   V(I8x16ShrU, vesrlv, 0)
5395 
5396 #define EMIT_SIMD_SHIFT(name, op, c1)                                  \
5397   void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5398                             Register src2, Simd128Register scratch) {  \
5399     vlvg(scratch, src2, MemOperand(r0, 0), Condition(c1));             \
5400     vrep(scratch, scratch, Operand(0), Condition(c1));                 \
5401     op(dst, src1, scratch, Condition(0), Condition(0), Condition(c1)); \
5402   }                                                                    \
5403   void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5404                             const Operand& src2, Register scratch1,    \
5405                             Simd128Register scratch2) {                \
5406     mov(scratch1, src2);                                               \
5407     name(dst, src1, scratch1, scratch2);                               \
5408   }
5409 SIMD_SHIFT_LIST(EMIT_SIMD_SHIFT)
5410 #undef EMIT_SIMD_SHIFT
5411 #undef SIMD_SHIFT_LIST
5412 
5413 #define SIMD_EXT_MUL_LIST(V)                    \
5414   V(I64x2ExtMulLowI32x4S, vme, vmo, vmrl, 2)    \
5415   V(I64x2ExtMulHighI32x4S, vme, vmo, vmrh, 2)   \
5416   V(I64x2ExtMulLowI32x4U, vmle, vmlo, vmrl, 2)  \
5417   V(I64x2ExtMulHighI32x4U, vmle, vmlo, vmrh, 2) \
5418   V(I32x4ExtMulLowI16x8S, vme, vmo, vmrl, 1)    \
5419   V(I32x4ExtMulHighI16x8S, vme, vmo, vmrh, 1)   \
5420   V(I32x4ExtMulLowI16x8U, vmle, vmlo, vmrl, 1)  \
5421   V(I32x4ExtMulHighI16x8U, vmle, vmlo, vmrh, 1) \
5422   V(I16x8ExtMulLowI8x16S, vme, vmo, vmrl, 0)    \
5423   V(I16x8ExtMulHighI8x16S, vme, vmo, vmrh, 0)   \
5424   V(I16x8ExtMulLowI8x16U, vmle, vmlo, vmrl, 0)  \
5425   V(I16x8ExtMulHighI8x16U, vmle, vmlo, vmrh, 0)
5426 
5427 #define EMIT_SIMD_EXT_MUL(name, mul_even, mul_odd, merge, mode)                \
5428   void TurboAssembler::name(Simd128Register dst, Simd128Register src1,         \
5429                             Simd128Register src2, Simd128Register scratch) {   \
5430     mul_even(scratch, src1, src2, Condition(0), Condition(0),                  \
5431              Condition(mode));                                                 \
5432     mul_odd(dst, src1, src2, Condition(0), Condition(0), Condition(mode));     \
5433     merge(dst, scratch, dst, Condition(0), Condition(0), Condition(mode + 1)); \
5434   }
5435 SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL)
5436 #undef EMIT_SIMD_EXT_MUL
5437 #undef SIMD_EXT_MUL_LIST
5438 
5439 #define SIMD_ALL_TRUE_LIST(V) \
5440   V(I64x2AllTrue, 3)          \
5441   V(I32x4AllTrue, 2)          \
5442   V(I16x8AllTrue, 1)          \
5443   V(I8x16AllTrue, 0)
5444 
5445 #define EMIT_SIMD_ALL_TRUE(name, mode)                                     \
5446   void TurboAssembler::name(Register dst, Simd128Register src,             \
5447                             Register scratch1, Simd128Register scratch2) { \
5448     mov(scratch1, Operand(1));                                             \
5449     xgr(dst, dst);                                                         \
5450     vx(scratch2, scratch2, scratch2, Condition(0), Condition(0),           \
5451        Condition(2));                                                      \
5452     vceq(scratch2, src, scratch2, Condition(0), Condition(mode));          \
5453     vtm(scratch2, scratch2, Condition(0), Condition(0), Condition(0));     \
5454     locgr(Condition(8), dst, scratch1);                                    \
5455   }
5456 SIMD_ALL_TRUE_LIST(EMIT_SIMD_ALL_TRUE)
5457 #undef EMIT_SIMD_ALL_TRUE
5458 #undef SIMD_ALL_TRUE_LIST
5459 
5460 #define SIMD_QFM_LIST(V) \
5461   V(F64x2Qfma, vfma, 3)  \
5462   V(F64x2Qfms, vfnms, 3) \
5463   V(F32x4Qfma, vfma, 2)  \
5464   V(F32x4Qfms, vfnms, 2)
5465 
5466 #define EMIT_SIMD_QFM(name, op, c1)                                       \
5467   void TurboAssembler::name(Simd128Register dst, Simd128Register src1,    \
5468                             Simd128Register src2, Simd128Register src3) { \
5469     op(dst, src2, src3, src1, Condition(c1), Condition(0));               \
5470   }
5471 SIMD_QFM_LIST(EMIT_SIMD_QFM)
5472 #undef EMIT_SIMD_QFM
5473 #undef SIMD_QFM_LIST
5474 
5475 void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
5476                               Simd128Register src2, Register scratch1,
5477                               Register scratch2, Register scratch3) {
5478   Register scratch_1 = scratch1;
5479   Register scratch_2 = scratch2;
5480   for (int i = 0; i < 2; i++) {
5481     vlgv(scratch_1, src1, MemOperand(r0, i), Condition(3));
5482     vlgv(scratch_2, src2, MemOperand(r0, i), Condition(3));
5483     MulS64(scratch_1, scratch_2);
5484     scratch_1 = scratch2;
5485     scratch_2 = scratch3;
5486   }
5487   vlvgp(dst, scratch1, scratch2);
5488 }
5489 
5490 void TurboAssembler::F64x2Ne(Simd128Register dst, Simd128Register src1,
5491                              Simd128Register src2) {
5492   vfce(dst, src1, src2, Condition(0), Condition(0), Condition(3));
5493   vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5494 }
5495 
5496 void TurboAssembler::F64x2Lt(Simd128Register dst, Simd128Register src1,
5497                              Simd128Register src2) {
5498   vfch(dst, src2, src1, Condition(0), Condition(0), Condition(3));
5499 }
5500 
5501 void TurboAssembler::F64x2Le(Simd128Register dst, Simd128Register src1,
5502                              Simd128Register src2) {
5503   vfche(dst, src2, src1, Condition(0), Condition(0), Condition(3));
5504 }
5505 
5506 void TurboAssembler::F32x4Ne(Simd128Register dst, Simd128Register src1,
5507                              Simd128Register src2) {
5508   vfce(dst, src1, src2, Condition(0), Condition(0), Condition(2));
5509   vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5510 }
5511 
5512 void TurboAssembler::F32x4Lt(Simd128Register dst, Simd128Register src1,
5513                              Simd128Register src2) {
5514   vfch(dst, src2, src1, Condition(0), Condition(0), Condition(2));
5515 }
5516 
5517 void TurboAssembler::F32x4Le(Simd128Register dst, Simd128Register src1,
5518                              Simd128Register src2) {
5519   vfche(dst, src2, src1, Condition(0), Condition(0), Condition(2));
5520 }
5521 
5522 void TurboAssembler::I64x2Ne(Simd128Register dst, Simd128Register src1,
5523                              Simd128Register src2) {
5524   vceq(dst, src1, src2, Condition(0), Condition(3));
5525   vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5526 }
5527 
5528 void TurboAssembler::I64x2GeS(Simd128Register dst, Simd128Register src1,
5529                               Simd128Register src2) {
5530   // Compute !(B > A) which is equal to A >= B.
5531   vch(dst, src2, src1, Condition(0), Condition(3));
5532   vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5533 }
5534 
5535 void TurboAssembler::I32x4Ne(Simd128Register dst, Simd128Register src1,
5536                              Simd128Register src2) {
5537   vceq(dst, src1, src2, Condition(0), Condition(2));
5538   vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5539 }
5540 
5541 void TurboAssembler::I32x4GeS(Simd128Register dst, Simd128Register src1,
5542                               Simd128Register src2) {
5543   // Compute !(B > A) which is equal to A >= B.
5544   vch(dst, src2, src1, Condition(0), Condition(2));
5545   vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5546 }
5547 
5548 void TurboAssembler::I32x4GeU(Simd128Register dst, Simd128Register src1,
5549                               Simd128Register src2, Simd128Register scratch) {
5550   vceq(scratch, src1, src2, Condition(0), Condition(2));
5551   vchl(dst, src1, src2, Condition(0), Condition(2));
5552   vo(dst, dst, scratch, Condition(0), Condition(0), Condition(2));
5553 }
5554 
5555 void TurboAssembler::I16x8Ne(Simd128Register dst, Simd128Register src1,
5556                              Simd128Register src2) {
5557   vceq(dst, src1, src2, Condition(0), Condition(1));
5558   vno(dst, dst, dst, Condition(0), Condition(0), Condition(1));
5559 }
5560 
5561 void TurboAssembler::I16x8GeS(Simd128Register dst, Simd128Register src1,
5562                               Simd128Register src2) {
5563   // Compute !(B > A) which is equal to A >= B.
5564   vch(dst, src2, src1, Condition(0), Condition(1));
5565   vno(dst, dst, dst, Condition(0), Condition(0), Condition(1));
5566 }
5567 
5568 void TurboAssembler::I16x8GeU(Simd128Register dst, Simd128Register src1,
5569                               Simd128Register src2, Simd128Register scratch) {
5570   vceq(scratch, src1, src2, Condition(0), Condition(1));
5571   vchl(dst, src1, src2, Condition(0), Condition(1));
5572   vo(dst, dst, scratch, Condition(0), Condition(0), Condition(1));
5573 }
5574 
5575 void TurboAssembler::I8x16Ne(Simd128Register dst, Simd128Register src1,
5576                              Simd128Register src2) {
5577   vceq(dst, src1, src2, Condition(0), Condition(0));
5578   vno(dst, dst, dst, Condition(0), Condition(0), Condition(0));
5579 }
5580 
5581 void TurboAssembler::I8x16GeS(Simd128Register dst, Simd128Register src1,
5582                               Simd128Register src2) {
5583   // Compute !(B > A) which is equal to A >= B.
5584   vch(dst, src2, src1, Condition(0), Condition(0));
5585   vno(dst, dst, dst, Condition(0), Condition(0), Condition(0));
5586 }
5587 
5588 void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1,
5589                               Simd128Register src2, Simd128Register scratch) {
5590   vceq(scratch, src1, src2, Condition(0), Condition(0));
5591   vchl(dst, src1, src2, Condition(0), Condition(0));
5592   vo(dst, dst, scratch, Condition(0), Condition(0), Condition(0));
5593 }
5594 
5595 void TurboAssembler::I64x2BitMask(Register dst, Simd128Register src,
5596                                   Register scratch1, Simd128Register scratch2) {
5597   mov(scratch1, Operand(0x8080808080800040));
5598   vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5599   vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5600   vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5601 }
5602 
5603 void TurboAssembler::I32x4BitMask(Register dst, Simd128Register src,
5604                                   Register scratch1, Simd128Register scratch2) {
5605   mov(scratch1, Operand(0x8080808000204060));
5606   vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5607   vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5608   vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5609 }
5610 
5611 void TurboAssembler::I16x8BitMask(Register dst, Simd128Register src,
5612                                   Register scratch1, Simd128Register scratch2) {
5613   mov(scratch1, Operand(0x10203040506070));
5614   vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5615   vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5616   vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5617 }
5618 
5619 void TurboAssembler::F64x2ConvertLowI32x4S(Simd128Register dst,
5620                                            Simd128Register src) {
5621   vupl(dst, src, Condition(0), Condition(0), Condition(2));
5622   vcdg(dst, dst, Condition(4), Condition(0), Condition(3));
5623 }
5624 
5625 void TurboAssembler::F64x2ConvertLowI32x4U(Simd128Register dst,
5626                                            Simd128Register src) {
5627   vupll(dst, src, Condition(0), Condition(0), Condition(2));
5628   vcdlg(dst, dst, Condition(4), Condition(0), Condition(3));
5629 }
5630 
5631 void TurboAssembler::I8x16BitMask(Register dst, Simd128Register src,
5632                                   Register scratch1, Register scratch2,
5633                                   Simd128Register scratch3) {
5634   mov(scratch1, Operand(0x4048505860687078));
5635   mov(scratch2, Operand(0x8101820283038));
5636   vlvgp(scratch3, scratch2, scratch1);
5637   vbperm(scratch3, src, scratch3, Condition(0), Condition(0), Condition(0));
5638   vlgv(dst, scratch3, MemOperand(r0, 3), Condition(1));
5639 }
5640 
5641 void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
5642                                  Register scratch) {
5643   mov(dst, Operand(1));
5644   xgr(scratch, scratch);
5645   vtm(src, src, Condition(0), Condition(0), Condition(0));
5646   locgr(Condition(8), dst, scratch);
5647 }
5648 
5649 #define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \
5650   for (int index = 0; index < 4; index++) {                           \
5651     vlgv(scratch2, src, MemOperand(r0, index), Condition(2));         \
5652     MovIntToFloat(scratch1, scratch2);                                \
5653     convert(scratch2, scratch1, kRoundToZero);                        \
5654     vlvg(dst, scratch2, MemOperand(r0, index), Condition(2));         \
5655   }
5656 void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst,
5657                                         Simd128Register src,
5658                                         Simd128Register scratch1,
5659                                         Register scratch2) {
5660   // NaN to 0.
5661   vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2));
5662   vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0));
5663   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5664     vcgd(dst, dst, Condition(5), Condition(0), Condition(2));
5665   } else {
5666     CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2)
5667   }
5668 }
5669 
5670 void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst,
5671                                         Simd128Register src,
5672                                         Simd128Register scratch1,
5673                                         Register scratch2) {
5674   // vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0
5675   // automatically.
5676   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5677     vclgd(dst, src, Condition(5), Condition(0), Condition(2));
5678   } else {
5679     CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1,
5680                            scratch2)
5681   }
5682 }
5683 #undef CONVERT_FLOAT_TO_INT32
5684 
5685 #define CONVERT_INT32_TO_FLOAT(convert, dst, src, scratch1, scratch2) \
5686   for (int index = 0; index < 4; index++) {                           \
5687     vlgv(scratch2, src, MemOperand(r0, index), Condition(2));         \
5688     convert(scratch1, scratch2);                                      \
5689     MovFloatToInt(scratch2, scratch1);                                \
5690     vlvg(dst, scratch2, MemOperand(r0, index), Condition(2));         \
5691   }
5692 void TurboAssembler::F32x4SConvertI32x4(Simd128Register dst,
5693                                         Simd128Register src,
5694                                         Simd128Register scratch1,
5695                                         Register scratch2) {
5696   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5697     vcdg(dst, src, Condition(4), Condition(0), Condition(2));
5698   } else {
5699     CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, dst, src, scratch1, scratch2)
5700   }
5701 }
5702 void TurboAssembler::F32x4UConvertI32x4(Simd128Register dst,
5703                                         Simd128Register src,
5704                                         Simd128Register scratch1,
5705                                         Register scratch2) {
5706   if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5707     vcdlg(dst, src, Condition(4), Condition(0), Condition(2));
5708   } else {
5709     CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, dst, src, scratch1,
5710                            scratch2)
5711   }
5712 }
5713 #undef CONVERT_INT32_TO_FLOAT
5714 
5715 void TurboAssembler::I16x8SConvertI32x4(Simd128Register dst,
5716                                         Simd128Register src1,
5717                                         Simd128Register src2) {
5718   vpks(dst, src2, src1, Condition(0), Condition(2));
5719 }
5720 
5721 void TurboAssembler::I8x16SConvertI16x8(Simd128Register dst,
5722                                         Simd128Register src1,
5723                                         Simd128Register src2) {
5724   vpks(dst, src2, src1, Condition(0), Condition(1));
5725 }
5726 
5727 #define VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, mode)       \
5728   vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), \
5729      Condition(0), Condition(mode));                               \
5730   vmx(scratch, src1, kDoubleRegZero, Condition(0), Condition(0),   \
5731       Condition(mode));                                            \
5732   vmx(dst, src2, kDoubleRegZero, Condition(0), Condition(0), Condition(mode));
5733 void TurboAssembler::I16x8UConvertI32x4(Simd128Register dst,
5734                                         Simd128Register src1,
5735                                         Simd128Register src2,
5736                                         Simd128Register scratch) {
5737   // treat inputs as signed, and saturate to unsigned (negative to 0).
5738   VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 2)
5739   vpkls(dst, dst, scratch, Condition(0), Condition(2));
5740 }
5741 
5742 void TurboAssembler::I8x16UConvertI16x8(Simd128Register dst,
5743                                         Simd128Register src1,
5744                                         Simd128Register src2,
5745                                         Simd128Register scratch) {
5746   // treat inputs as signed, and saturate to unsigned (negative to 0).
5747   VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 1)
5748   vpkls(dst, dst, scratch, Condition(0), Condition(1));
5749 }
5750 #undef VECTOR_PACK_UNSIGNED
5751 
5752 #define BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, op, extract_high, \
5753                       extract_low, mode)                                     \
5754   DCHECK(dst != scratch1 && dst != scratch2);                                \
5755   DCHECK(dst != src1 && dst != src2);                                        \
5756   extract_high(scratch1, src1, Condition(0), Condition(0), Condition(mode)); \
5757   extract_high(scratch2, src2, Condition(0), Condition(0), Condition(mode)); \
5758   op(dst, scratch1, scratch2, Condition(0), Condition(0),                    \
5759      Condition(mode + 1));                                                   \
5760   extract_low(scratch1, src1, Condition(0), Condition(0), Condition(mode));  \
5761   extract_low(scratch2, src2, Condition(0), Condition(0), Condition(mode));  \
5762   op(scratch1, scratch1, scratch2, Condition(0), Condition(0),               \
5763      Condition(mode + 1));
5764 void TurboAssembler::I16x8AddSatS(Simd128Register dst, Simd128Register src1,
5765                                   Simd128Register src2,
5766                                   Simd128Register scratch1,
5767                                   Simd128Register scratch2) {
5768   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 1)
5769   vpks(dst, dst, scratch1, Condition(0), Condition(2));
5770 }
5771 
5772 void TurboAssembler::I16x8SubSatS(Simd128Register dst, Simd128Register src1,
5773                                   Simd128Register src2,
5774                                   Simd128Register scratch1,
5775                                   Simd128Register scratch2) {
5776   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 1)
5777   vpks(dst, dst, scratch1, Condition(0), Condition(2));
5778 }
5779 
5780 void TurboAssembler::I16x8AddSatU(Simd128Register dst, Simd128Register src1,
5781                                   Simd128Register src2,
5782                                   Simd128Register scratch1,
5783                                   Simd128Register scratch2) {
5784   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 1)
5785   vpkls(dst, dst, scratch1, Condition(0), Condition(2));
5786 }
5787 
5788 void TurboAssembler::I16x8SubSatU(Simd128Register dst, Simd128Register src1,
5789                                   Simd128Register src2,
5790                                   Simd128Register scratch1,
5791                                   Simd128Register scratch2) {
5792   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 1)
5793   // negative intermediate values to 0.
5794   vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0),
5795      Condition(0));
5796   vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(2));
5797   vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0),
5798       Condition(2));
5799   vpkls(dst, dst, scratch1, Condition(0), Condition(2));
5800 }
5801 
5802 void TurboAssembler::I8x16AddSatS(Simd128Register dst, Simd128Register src1,
5803                                   Simd128Register src2,
5804                                   Simd128Register scratch1,
5805                                   Simd128Register scratch2) {
5806   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 0)
5807   vpks(dst, dst, scratch1, Condition(0), Condition(1));
5808 }
5809 
5810 void TurboAssembler::I8x16SubSatS(Simd128Register dst, Simd128Register src1,
5811                                   Simd128Register src2,
5812                                   Simd128Register scratch1,
5813                                   Simd128Register scratch2) {
5814   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 0)
5815   vpks(dst, dst, scratch1, Condition(0), Condition(1));
5816 }
5817 
5818 void TurboAssembler::I8x16AddSatU(Simd128Register dst, Simd128Register src1,
5819                                   Simd128Register src2,
5820                                   Simd128Register scratch1,
5821                                   Simd128Register scratch2) {
5822   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 0)
5823   vpkls(dst, dst, scratch1, Condition(0), Condition(1));
5824 }
5825 
5826 void TurboAssembler::I8x16SubSatU(Simd128Register dst, Simd128Register src1,
5827                                   Simd128Register src2,
5828                                   Simd128Register scratch1,
5829                                   Simd128Register scratch2) {
5830   BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 0)
5831   // negative intermediate values to 0.
5832   vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0),
5833      Condition(0));
5834   vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(1));
5835   vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0),
5836       Condition(1));
5837   vpkls(dst, dst, scratch1, Condition(0), Condition(1));
5838 }
5839 #undef BINOP_EXTRACT
5840 
5841 void TurboAssembler::F64x2PromoteLowF32x4(Simd128Register dst,
5842                                           Simd128Register src,
5843                                           Simd128Register scratch1,
5844                                           Register scratch2, Register scratch3,
5845                                           Register scratch4) {
5846   Register holder = scratch3;
5847   for (int index = 0; index < 2; ++index) {
5848     vlgv(scratch2, src, MemOperand(scratch2, index + 2), Condition(2));
5849     MovIntToFloat(scratch1, scratch2);
5850     ldebr(scratch1, scratch1);
5851     MovDoubleToInt64(holder, scratch1);
5852     holder = scratch4;
5853   }
5854   vlvgp(dst, scratch3, scratch4);
5855 }
5856 
5857 void TurboAssembler::F32x4DemoteF64x2Zero(Simd128Register dst,
5858                                           Simd128Register src,
5859                                           Simd128Register scratch1,
5860                                           Register scratch2, Register scratch3,
5861                                           Register scratch4) {
5862   Register holder = scratch3;
5863   for (int index = 0; index < 2; ++index) {
5864     vlgv(scratch2, src, MemOperand(r0, index), Condition(3));
5865     MovInt64ToDouble(scratch1, scratch2);
5866     ledbr(scratch1, scratch1);
5867     MovFloatToInt(holder, scratch1);
5868     holder = scratch4;
5869   }
5870   vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5871   vlvg(dst, scratch3, MemOperand(r0, 2), Condition(2));
5872   vlvg(dst, scratch4, MemOperand(r0, 3), Condition(2));
5873 }
5874 
5875 #define EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, lane_size, mul_even, \
5876                          mul_odd)                                           \
5877   CHECK_NE(src, scratch2);                                                  \
5878   vrepi(scratch2, Operand(1), Condition(lane_size));                        \
5879   mul_even(scratch1, src, scratch2, Condition(0), Condition(0),             \
5880            Condition(lane_size));                                           \
5881   mul_odd(scratch2, src, scratch2, Condition(0), Condition(0),              \
5882           Condition(lane_size));                                            \
5883   va(dst, scratch1, scratch2, Condition(0), Condition(0),                   \
5884      Condition(lane_size + 1));
5885 void TurboAssembler::I32x4ExtAddPairwiseI16x8S(Simd128Register dst,
5886                                                Simd128Register src,
5887                                                Simd128Register scratch1,
5888                                                Simd128Register scratch2) {
5889   EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 1, vme, vmo)
5890 }
5891 
5892 void TurboAssembler::I32x4ExtAddPairwiseI16x8U(Simd128Register dst,
5893                                                Simd128Register src,
5894                                                Simd128Register scratch,
5895                                                Simd128Register scratch2) {
5896   vx(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3));
5897   vsum(dst, src, scratch, Condition(0), Condition(0), Condition(1));
5898 }
5899 
5900 void TurboAssembler::I16x8ExtAddPairwiseI8x16S(Simd128Register dst,
5901                                                Simd128Register src,
5902                                                Simd128Register scratch1,
5903                                                Simd128Register scratch2) {
5904   EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vme, vmo)
5905 }
5906 
5907 void TurboAssembler::I16x8ExtAddPairwiseI8x16U(Simd128Register dst,
5908                                                Simd128Register src,
5909                                                Simd128Register scratch1,
5910                                                Simd128Register scratch2) {
5911   EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vmle, vmlo)
5912 }
5913 #undef EXT_ADD_PAIRWISE
5914 
5915 void TurboAssembler::I32x4TruncSatF64x2SZero(Simd128Register dst,
5916                                              Simd128Register src,
5917                                              Simd128Register scratch) {
5918   // NaN to 0.
5919   vlr(scratch, src, Condition(0), Condition(0), Condition(0));
5920   vfce(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3));
5921   vn(scratch, src, scratch, Condition(0), Condition(0), Condition(0));
5922   vcgd(scratch, scratch, Condition(5), Condition(0), Condition(3));
5923   vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5924   vpks(dst, dst, scratch, Condition(0), Condition(3));
5925 }
5926 
5927 void TurboAssembler::I32x4TruncSatF64x2UZero(Simd128Register dst,
5928                                              Simd128Register src,
5929                                              Simd128Register scratch) {
5930   vclgd(scratch, src, Condition(5), Condition(0), Condition(3));
5931   vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5932   vpkls(dst, dst, scratch, Condition(0), Condition(3));
5933 }
5934 
5935 void TurboAssembler::S128Const(Simd128Register dst, uint64_t high, uint64_t low,
5936                                Register scratch1, Register scratch2) {
5937   mov(scratch1, Operand(low));
5938   mov(scratch2, Operand(high));
5939   vlvgp(dst, scratch2, scratch1);
5940 }
5941 
5942 void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1,
5943                                   Simd128Register src2, Register scratch1,
5944                                   Register scratch2, Simd128Register scratch3,
5945                                   Simd128Register scratch4) {
5946   DCHECK(!AreAliased(src1, src2, scratch3, scratch4));
5947   // Saturate the indices to 5 bits. Input indices more than 31 should
5948   // return 0.
5949   vrepi(scratch3, Operand(31), Condition(0));
5950   vmnl(scratch4, src2, scratch3, Condition(0), Condition(0), Condition(0));
5951   // Input needs to be reversed.
5952   vlgv(scratch1, src1, MemOperand(r0, 0), Condition(3));
5953   vlgv(scratch2, src1, MemOperand(r0, 1), Condition(3));
5954   lrvgr(scratch1, scratch1);
5955   lrvgr(scratch2, scratch2);
5956   vlvgp(dst, scratch2, scratch1);
5957   // Clear scratch.
5958   vx(scratch3, scratch3, scratch3, Condition(0), Condition(0), Condition(0));
5959   vperm(dst, dst, scratch3, scratch4, Condition(0), Condition(0));
5960 }
5961 
5962 void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
5963                                   Simd128Register src2, uint64_t high,
5964                                   uint64_t low, Register scratch1,
5965                                   Register scratch2, Simd128Register scratch3) {
5966   mov(scratch1, Operand(low));
5967   mov(scratch2, Operand(high));
5968   vlvgp(scratch3, scratch2, scratch1);
5969   vperm(dst, src1, src2, scratch3, Condition(0), Condition(0));
5970 }
5971 
5972 void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
5973                                     Simd128Register src2,
5974                                     Simd128Register scratch) {
5975   vme(scratch, src1, src2, Condition(0), Condition(0), Condition(1));
5976   vmo(dst, src1, src2, Condition(0), Condition(0), Condition(1));
5977   va(dst, scratch, dst, Condition(0), Condition(0), Condition(2));
5978 }
5979 
5980 #define Q15_MUL_ROAUND(accumulator, src1, src2, const_val, scratch, unpack) \
5981   unpack(scratch, src1, Condition(0), Condition(0), Condition(1));          \
5982   unpack(accumulator, src2, Condition(0), Condition(0), Condition(1));      \
5983   vml(accumulator, scratch, accumulator, Condition(0), Condition(0),        \
5984       Condition(2));                                                        \
5985   va(accumulator, accumulator, const_val, Condition(0), Condition(0),       \
5986      Condition(2));                                                         \
5987   vrepi(scratch, Operand(15), Condition(2));                                \
5988   vesrav(accumulator, accumulator, scratch, Condition(0), Condition(0),     \
5989          Condition(2));
5990 void TurboAssembler::I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1,
5991                                       Simd128Register src2,
5992                                       Simd128Register scratch1,
5993                                       Simd128Register scratch2,
5994                                       Simd128Register scratch3) {
5995   DCHECK(!AreAliased(src1, src2, scratch1, scratch2, scratch3));
5996   vrepi(scratch1, Operand(0x4000), Condition(2));
5997   Q15_MUL_ROAUND(scratch2, src1, src2, scratch1, scratch3, vupl)
5998   Q15_MUL_ROAUND(dst, src1, src2, scratch1, scratch3, vuph)
5999   vpks(dst, dst, scratch2, Condition(0), Condition(2));
6000 }
6001 #undef Q15_MUL_ROAUND
6002 
6003 // Vector LE Load and Transform instructions.
6004 #ifdef V8_TARGET_BIG_ENDIAN
6005 #define IS_BIG_ENDIAN true
6006 #else
6007 #define IS_BIG_ENDIAN false
6008 #endif
6009 
6010 #define CAN_LOAD_STORE_REVERSE \
6011   IS_BIG_ENDIAN&& CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)
6012 
6013 #define LOAD_SPLAT_LIST(V)       \
6014   V(64x2, vlbrrep, LoadU64LE, 3) \
6015   V(32x4, vlbrrep, LoadU32LE, 2) \
6016   V(16x8, vlbrrep, LoadU16LE, 1) \
6017   V(8x16, vlrep, LoadU8, 0)
6018 
6019 #define LOAD_SPLAT(name, vector_instr, scalar_instr, condition)       \
6020   void TurboAssembler::LoadAndSplat##name##LE(                        \
6021       Simd128Register dst, const MemOperand& mem, Register scratch) { \
6022     if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {          \
6023       vector_instr(dst, mem, Condition(condition));                   \
6024       return;                                                         \
6025     }                                                                 \
6026     scalar_instr(scratch, mem);                                       \
6027     vlvg(dst, scratch, MemOperand(r0, 0), Condition(condition));      \
6028     vrep(dst, dst, Operand(0), Condition(condition));                 \
6029   }
6030 LOAD_SPLAT_LIST(LOAD_SPLAT)
6031 #undef LOAD_SPLAT
6032 #undef LOAD_SPLAT_LIST
6033 
6034 #define LOAD_EXTEND_LIST(V) \
6035   V(32x2U, vuplh, 2)        \
6036   V(32x2S, vuph, 2)         \
6037   V(16x4U, vuplh, 1)        \
6038   V(16x4S, vuph, 1)         \
6039   V(8x8U, vuplh, 0)         \
6040   V(8x8S, vuph, 0)
6041 
6042 #define LOAD_EXTEND(name, unpack_instr, condition)                            \
6043   void TurboAssembler::LoadAndExtend##name##LE(                               \
6044       Simd128Register dst, const MemOperand& mem, Register scratch) {         \
6045     if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {                  \
6046       vlebrg(dst, mem, Condition(0));                                         \
6047     } else {                                                                  \
6048       LoadU64LE(scratch, mem);                                                \
6049       vlvg(dst, scratch, MemOperand(r0, 0), Condition(3));                    \
6050     }                                                                         \
6051     unpack_instr(dst, dst, Condition(0), Condition(0), Condition(condition)); \
6052   }
6053 LOAD_EXTEND_LIST(LOAD_EXTEND)
6054 #undef LOAD_EXTEND
6055 #undef LOAD_EXTEND
6056 
6057 void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem,
6058                                    Register scratch) {
6059   vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
6060   if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
6061     vlebrf(dst, mem, Condition(3));
6062     return;
6063   }
6064   LoadU32LE(scratch, mem);
6065   vlvg(dst, scratch, MemOperand(r0, 3), Condition(2));
6066 }
6067 
6068 void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem,
6069                                    Register scratch) {
6070   vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
6071   if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
6072     vlebrg(dst, mem, Condition(1));
6073     return;
6074   }
6075   LoadU64LE(scratch, mem);
6076   vlvg(dst, scratch, MemOperand(r0, 1), Condition(3));
6077 }
6078 
6079 #define LOAD_LANE_LIST(V)     \
6080   V(64, vlebrg, LoadU64LE, 3) \
6081   V(32, vlebrf, LoadU32LE, 2) \
6082   V(16, vlebrh, LoadU16LE, 1) \
6083   V(8, vleb, LoadU8, 0)
6084 
6085 #define LOAD_LANE(name, vector_instr, scalar_instr, condition)             \
6086   void TurboAssembler::LoadLane##name##LE(Simd128Register dst,             \
6087                                           const MemOperand& mem, int lane, \
6088                                           Register scratch) {              \
6089     if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {               \
6090       vector_instr(dst, mem, Condition(lane));                             \
6091       return;                                                              \
6092     }                                                                      \
6093     scalar_instr(scratch, mem);                                            \
6094     vlvg(dst, scratch, MemOperand(r0, lane), Condition(condition));        \
6095   }
6096 LOAD_LANE_LIST(LOAD_LANE)
6097 #undef LOAD_LANE
6098 #undef LOAD_LANE_LIST
6099 
6100 #define STORE_LANE_LIST(V)      \
6101   V(64, vstebrg, StoreU64LE, 3) \
6102   V(32, vstebrf, StoreU32LE, 2) \
6103   V(16, vstebrh, StoreU16LE, 1) \
6104   V(8, vsteb, StoreU8, 0)
6105 
6106 #define STORE_LANE(name, vector_instr, scalar_instr, condition)             \
6107   void TurboAssembler::StoreLane##name##LE(Simd128Register src,             \
6108                                            const MemOperand& mem, int lane, \
6109                                            Register scratch) {              \
6110     if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {                \
6111       vector_instr(src, mem, Condition(lane));                              \
6112       return;                                                               \
6113     }                                                                       \
6114     vlgv(scratch, src, MemOperand(r0, lane), Condition(condition));         \
6115     scalar_instr(scratch, mem);                                             \
6116   }
6117 STORE_LANE_LIST(STORE_LANE)
6118 #undef STORE_LANE
6119 #undef STORE_LANE_LIST
6120 #undef CAN_LOAD_STORE_REVERSE
6121 #undef IS_BIG_ENDIAN
6122 
6123 void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
6124   ASM_CODE_COMMENT(this);
6125   DCHECK(root_array_available());
6126   Isolate* isolate = this->isolate();
6127   ExternalReference limit =
6128       kind == StackLimitKind::kRealStackLimit
6129           ? ExternalReference::address_of_real_jslimit(isolate)
6130           : ExternalReference::address_of_jslimit(isolate);
6131   DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
6132 
6133   intptr_t offset =
6134       TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
6135   CHECK(is_int32(offset));
6136   LoadU64(destination, MemOperand(kRootRegister, offset));
6137 }
6138 
6139 }  // namespace internal
6140 }  // namespace v8
6141 
6142 #endif  // V8_TARGET_ARCH_S390
6143