1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/codegen/macro-assembler.h"
6 #include "src/compiler/backend/instruction-scheduler.h"
7
8 namespace v8 {
9 namespace internal {
10 namespace compiler {
11
SchedulerSupported()12 bool InstructionScheduler::SchedulerSupported() { return true; }
13
GetTargetInstructionFlags(const Instruction * instr) const14 int InstructionScheduler::GetTargetInstructionFlags(
15 const Instruction* instr) const {
16 switch (instr->arch_opcode()) {
17 case kRiscvAbsD:
18 case kRiscvAbsS:
19 case kRiscvAdd32:
20 case kRiscvAddD:
21 case kRiscvAddS:
22 case kRiscvAnd:
23 case kRiscvAnd32:
24 case kRiscvAssertEqual:
25 case kRiscvBitcastDL:
26 case kRiscvBitcastLD:
27 case kRiscvBitcastInt32ToFloat32:
28 case kRiscvBitcastFloat32ToInt32:
29 case kRiscvByteSwap32:
30 case kRiscvByteSwap64:
31 case kRiscvCeilWD:
32 case kRiscvCeilWS:
33 case kRiscvClz32:
34 case kRiscvCmp:
35 case kRiscvCmpZero:
36 case kRiscvCmpD:
37 case kRiscvCmpS:
38 case kRiscvCtz32:
39 case kRiscvCvtDL:
40 case kRiscvCvtDS:
41 case kRiscvCvtDUl:
42 case kRiscvCvtDUw:
43 case kRiscvCvtDW:
44 case kRiscvCvtSD:
45 case kRiscvCvtSL:
46 case kRiscvCvtSUl:
47 case kRiscvCvtSUw:
48 case kRiscvCvtSW:
49 case kRiscvMulHigh64:
50 case kRiscvMulHighU32:
51 case kRiscvAdd64:
52 case kRiscvAddOvf64:
53 case kRiscvClz64:
54 case kRiscvCtz64:
55 case kRiscvDiv64:
56 case kRiscvDivU64:
57 case kRiscvZeroExtendWord:
58 case kRiscvSignExtendWord:
59 case kRiscvDiv32:
60 case kRiscvDivD:
61 case kRiscvDivS:
62 case kRiscvDivU32:
63 case kRiscvMod64:
64 case kRiscvModU64:
65 case kRiscvMul64:
66 case kRiscvPopcnt64:
67 case kRiscvRor64:
68 case kRiscvSar64:
69 case kRiscvShl64:
70 case kRiscvShr64:
71 case kRiscvSub64:
72 case kRiscvSubOvf64:
73 case kRiscvF64x2Abs:
74 case kRiscvF64x2Neg:
75 case kRiscvF64x2Sqrt:
76 case kRiscvF64x2Add:
77 case kRiscvF64x2Sub:
78 case kRiscvF64x2Mul:
79 case kRiscvF64x2Div:
80 case kRiscvF64x2Min:
81 case kRiscvF64x2Max:
82 case kRiscvF64x2Eq:
83 case kRiscvF64x2Ne:
84 case kRiscvF64x2Lt:
85 case kRiscvF64x2Le:
86 case kRiscvF64x2Pmin:
87 case kRiscvF64x2Pmax:
88 case kRiscvF64x2ConvertLowI32x4S:
89 case kRiscvF64x2ConvertLowI32x4U:
90 case kRiscvF64x2PromoteLowF32x4:
91 case kRiscvF64x2Ceil:
92 case kRiscvF64x2Floor:
93 case kRiscvF64x2Trunc:
94 case kRiscvF64x2NearestInt:
95 case kRiscvI64x2Splat:
96 case kRiscvI64x2ExtractLane:
97 case kRiscvI64x2ReplaceLane:
98 case kRiscvI64x2Add:
99 case kRiscvI64x2Sub:
100 case kRiscvI64x2Mul:
101 case kRiscvI64x2Neg:
102 case kRiscvI64x2Abs:
103 case kRiscvI64x2Shl:
104 case kRiscvI64x2ShrS:
105 case kRiscvI64x2ShrU:
106 case kRiscvI64x2BitMask:
107 case kRiscvI64x2GtS:
108 case kRiscvI64x2GeS:
109 case kRiscvF32x4Abs:
110 case kRiscvF32x4Add:
111 case kRiscvF32x4Eq:
112 case kRiscvF32x4ExtractLane:
113 case kRiscvF32x4Lt:
114 case kRiscvF32x4Le:
115 case kRiscvF32x4Max:
116 case kRiscvF32x4Min:
117 case kRiscvF32x4Mul:
118 case kRiscvF32x4Div:
119 case kRiscvF32x4Ne:
120 case kRiscvF32x4Neg:
121 case kRiscvF32x4Sqrt:
122 case kRiscvF32x4RecipApprox:
123 case kRiscvF32x4RecipSqrtApprox:
124 case kRiscvF64x2Qfma:
125 case kRiscvF64x2Qfms:
126 case kRiscvF32x4Qfma:
127 case kRiscvF32x4Qfms:
128 case kRiscvF32x4ReplaceLane:
129 case kRiscvF32x4SConvertI32x4:
130 case kRiscvF32x4Splat:
131 case kRiscvF32x4Sub:
132 case kRiscvF32x4UConvertI32x4:
133 case kRiscvF32x4Pmin:
134 case kRiscvF32x4Pmax:
135 case kRiscvF32x4DemoteF64x2Zero:
136 case kRiscvF32x4Ceil:
137 case kRiscvF32x4Floor:
138 case kRiscvF32x4Trunc:
139 case kRiscvF32x4NearestInt:
140 case kRiscvI64x2Eq:
141 case kRiscvI64x2Ne:
142 case kRiscvF64x2Splat:
143 case kRiscvF64x2ExtractLane:
144 case kRiscvF64x2ReplaceLane:
145 case kRiscvFloat32Max:
146 case kRiscvFloat32Min:
147 case kRiscvFloat32RoundDown:
148 case kRiscvFloat32RoundTiesEven:
149 case kRiscvFloat32RoundTruncate:
150 case kRiscvFloat32RoundUp:
151 case kRiscvFloat64ExtractLowWord32:
152 case kRiscvFloat64ExtractHighWord32:
153 case kRiscvFloat64InsertLowWord32:
154 case kRiscvFloat64InsertHighWord32:
155 case kRiscvFloat64Max:
156 case kRiscvFloat64Min:
157 case kRiscvFloat64RoundDown:
158 case kRiscvFloat64RoundTiesEven:
159 case kRiscvFloat64RoundTruncate:
160 case kRiscvFloat64RoundUp:
161 case kRiscvFloat64SilenceNaN:
162 case kRiscvFloorWD:
163 case kRiscvFloorWS:
164 case kRiscvI64x2SConvertI32x4Low:
165 case kRiscvI64x2SConvertI32x4High:
166 case kRiscvI64x2UConvertI32x4Low:
167 case kRiscvI64x2UConvertI32x4High:
168 case kRiscvI16x8Add:
169 case kRiscvI16x8AddSatS:
170 case kRiscvI16x8AddSatU:
171 case kRiscvI16x8Eq:
172 case kRiscvI16x8ExtractLaneU:
173 case kRiscvI16x8ExtractLaneS:
174 case kRiscvI16x8GeS:
175 case kRiscvI16x8GeU:
176 case kRiscvI16x8GtS:
177 case kRiscvI16x8GtU:
178 case kRiscvI16x8MaxS:
179 case kRiscvI16x8MaxU:
180 case kRiscvI16x8MinS:
181 case kRiscvI16x8MinU:
182 case kRiscvI16x8Mul:
183 case kRiscvI16x8Ne:
184 case kRiscvI16x8Neg:
185 case kRiscvI16x8ReplaceLane:
186 case kRiscvI8x16SConvertI16x8:
187 case kRiscvI16x8SConvertI32x4:
188 case kRiscvI16x8SConvertI8x16High:
189 case kRiscvI16x8SConvertI8x16Low:
190 case kRiscvI16x8Shl:
191 case kRiscvI16x8ShrS:
192 case kRiscvI16x8ShrU:
193 case kRiscvI32x4TruncSatF64x2SZero:
194 case kRiscvI32x4TruncSatF64x2UZero:
195 case kRiscvI16x8Splat:
196 case kRiscvI16x8Sub:
197 case kRiscvI16x8SubSatS:
198 case kRiscvI16x8SubSatU:
199 case kRiscvI8x16UConvertI16x8:
200 case kRiscvI16x8UConvertI32x4:
201 case kRiscvI16x8UConvertI8x16High:
202 case kRiscvI16x8UConvertI8x16Low:
203 case kRiscvI16x8RoundingAverageU:
204 case kRiscvI16x8Q15MulRSatS:
205 case kRiscvI16x8Abs:
206 case kRiscvI16x8BitMask:
207 case kRiscvI32x4Add:
208 case kRiscvI32x4Eq:
209 case kRiscvI32x4ExtractLane:
210 case kRiscvI32x4GeS:
211 case kRiscvI32x4GeU:
212 case kRiscvI32x4GtS:
213 case kRiscvI32x4GtU:
214 case kRiscvI32x4MaxS:
215 case kRiscvI32x4MaxU:
216 case kRiscvI32x4MinS:
217 case kRiscvI32x4MinU:
218 case kRiscvI32x4Mul:
219 case kRiscvI32x4Ne:
220 case kRiscvI32x4Neg:
221 case kRiscvI32x4ReplaceLane:
222 case kRiscvI32x4SConvertF32x4:
223 case kRiscvI32x4SConvertI16x8High:
224 case kRiscvI32x4SConvertI16x8Low:
225 case kRiscvI32x4Shl:
226 case kRiscvI32x4ShrS:
227 case kRiscvI32x4ShrU:
228 case kRiscvI32x4Splat:
229 case kRiscvI32x4Sub:
230 case kRiscvI32x4UConvertF32x4:
231 case kRiscvI32x4UConvertI16x8High:
232 case kRiscvI32x4UConvertI16x8Low:
233 case kRiscvI32x4Abs:
234 case kRiscvI32x4BitMask:
235 case kRiscvI8x16Add:
236 case kRiscvI8x16AddSatS:
237 case kRiscvI8x16AddSatU:
238 case kRiscvI8x16Eq:
239 case kRiscvI8x16ExtractLaneU:
240 case kRiscvI8x16ExtractLaneS:
241 case kRiscvI8x16GeS:
242 case kRiscvI8x16GeU:
243 case kRiscvI8x16GtS:
244 case kRiscvI8x16GtU:
245 case kRiscvI8x16MaxS:
246 case kRiscvI8x16MaxU:
247 case kRiscvI8x16MinS:
248 case kRiscvI8x16MinU:
249 case kRiscvI8x16Ne:
250 case kRiscvI8x16Neg:
251 case kRiscvI8x16ReplaceLane:
252 case kRiscvI8x16Shl:
253 case kRiscvI8x16ShrS:
254 case kRiscvI8x16ShrU:
255 case kRiscvI8x16Splat:
256 case kRiscvI8x16Sub:
257 case kRiscvI8x16SubSatS:
258 case kRiscvI8x16SubSatU:
259 case kRiscvI8x16RoundingAverageU:
260 case kRiscvI8x16Abs:
261 case kRiscvI8x16BitMask:
262 case kRiscvI8x16Popcnt:
263 case kRiscvMaxD:
264 case kRiscvMaxS:
265 case kRiscvMinD:
266 case kRiscvMinS:
267 case kRiscvMod32:
268 case kRiscvModU32:
269 case kRiscvMov:
270 case kRiscvMul32:
271 case kRiscvMulD:
272 case kRiscvMulHigh32:
273 case kRiscvMulOvf32:
274 case kRiscvMulS:
275 case kRiscvNegD:
276 case kRiscvNegS:
277 case kRiscvNor:
278 case kRiscvNor32:
279 case kRiscvOr:
280 case kRiscvOr32:
281 case kRiscvPopcnt32:
282 case kRiscvRor32:
283 case kRiscvRoundWD:
284 case kRiscvRoundWS:
285 case kRiscvS128And:
286 case kRiscvS128Or:
287 case kRiscvS128Not:
288 case kRiscvS128Select:
289 case kRiscvS128AndNot:
290 case kRiscvS128Xor:
291 case kRiscvS128Const:
292 case kRiscvS128Zero:
293 case kRiscvS128Load32Zero:
294 case kRiscvS128Load64Zero:
295 case kRiscvS128AllOnes:
296 case kRiscvS16x8InterleaveEven:
297 case kRiscvS16x8InterleaveOdd:
298 case kRiscvS16x8InterleaveLeft:
299 case kRiscvS16x8InterleaveRight:
300 case kRiscvS16x8PackEven:
301 case kRiscvS16x8PackOdd:
302 case kRiscvS16x2Reverse:
303 case kRiscvS16x4Reverse:
304 case kRiscvI8x16AllTrue:
305 case kRiscvI32x4AllTrue:
306 case kRiscvI16x8AllTrue:
307 case kRiscvV128AnyTrue:
308 case kRiscvI64x2AllTrue:
309 case kRiscvS32x4InterleaveEven:
310 case kRiscvS32x4InterleaveOdd:
311 case kRiscvS32x4InterleaveLeft:
312 case kRiscvS32x4InterleaveRight:
313 case kRiscvS32x4PackEven:
314 case kRiscvS32x4PackOdd:
315 case kRiscvS32x4Shuffle:
316 case kRiscvS8x16Concat:
317 case kRiscvS8x16InterleaveEven:
318 case kRiscvS8x16InterleaveOdd:
319 case kRiscvS8x16InterleaveLeft:
320 case kRiscvS8x16InterleaveRight:
321 case kRiscvS8x16PackEven:
322 case kRiscvS8x16PackOdd:
323 case kRiscvS8x2Reverse:
324 case kRiscvS8x4Reverse:
325 case kRiscvS8x8Reverse:
326 case kRiscvI8x16Shuffle:
327 case kRiscvVwmul:
328 case kRiscvVwmulu:
329 case kRiscvVmvSx:
330 case kRiscvVcompress:
331 case kRiscvVaddVv:
332 case kRiscvVwadd:
333 case kRiscvVwaddu:
334 case kRiscvVrgather:
335 case kRiscvVslidedown:
336 case kRiscvSar32:
337 case kRiscvSignExtendByte:
338 case kRiscvSignExtendShort:
339 case kRiscvShl32:
340 case kRiscvShr32:
341 case kRiscvSqrtD:
342 case kRiscvSqrtS:
343 case kRiscvSub32:
344 case kRiscvSubD:
345 case kRiscvSubS:
346 case kRiscvTruncLD:
347 case kRiscvTruncLS:
348 case kRiscvTruncUlD:
349 case kRiscvTruncUlS:
350 case kRiscvTruncUwD:
351 case kRiscvTruncUwS:
352 case kRiscvTruncWD:
353 case kRiscvTruncWS:
354 case kRiscvTst:
355 case kRiscvXor:
356 case kRiscvXor32:
357 return kNoOpcodeFlags;
358
359 case kRiscvLb:
360 case kRiscvLbu:
361 case kRiscvLd:
362 case kRiscvLoadDouble:
363 case kRiscvLh:
364 case kRiscvLhu:
365 case kRiscvLw:
366 case kRiscvLoadFloat:
367 case kRiscvLwu:
368 case kRiscvRvvLd:
369 case kRiscvPeek:
370 case kRiscvUld:
371 case kRiscvULoadDouble:
372 case kRiscvUlh:
373 case kRiscvUlhu:
374 case kRiscvUlw:
375 case kRiscvUlwu:
376 case kRiscvULoadFloat:
377 case kRiscvS128LoadSplat:
378 case kRiscvS128Load64ExtendU:
379 case kRiscvS128Load64ExtendS:
380 case kRiscvS128LoadLane:
381 case kRiscvWord64AtomicLoadUint64:
382 case kRiscvLoadDecompressTaggedSigned:
383 case kRiscvLoadDecompressTaggedPointer:
384 case kRiscvLoadDecompressAnyTagged:
385 return kIsLoadOperation;
386
387 case kRiscvModD:
388 case kRiscvModS:
389 case kRiscvRvvSt:
390 case kRiscvPush:
391 case kRiscvSb:
392 case kRiscvSd:
393 case kRiscvStoreDouble:
394 case kRiscvSh:
395 case kRiscvStackClaim:
396 case kRiscvStoreToStackSlot:
397 case kRiscvSw:
398 case kRiscvStoreFloat:
399 case kRiscvUsd:
400 case kRiscvUStoreDouble:
401 case kRiscvUsh:
402 case kRiscvUsw:
403 case kRiscvUStoreFloat:
404 case kRiscvSync:
405 case kRiscvWord64AtomicStoreWord64:
406 case kRiscvWord64AtomicAddUint64:
407 case kRiscvWord64AtomicSubUint64:
408 case kRiscvWord64AtomicAndUint64:
409 case kRiscvWord64AtomicOrUint64:
410 case kRiscvWord64AtomicXorUint64:
411 case kRiscvWord64AtomicExchangeUint64:
412 case kRiscvWord64AtomicCompareExchangeUint64:
413 case kRiscvStoreCompressTagged:
414 case kRiscvS128StoreLane:
415 return kHasSideEffect;
416
417 #define CASE(Name) case k##Name:
418 COMMON_ARCH_OPCODE_LIST(CASE)
419 #undef CASE
420 // Already covered in architecture independent code.
421 UNREACHABLE();
422 }
423
424 UNREACHABLE();
425 }
426
427 enum Latency {
428 BRANCH = 4, // Estimated max.
429 RINT_S = 4, // Estimated.
430 RINT_D = 4, // Estimated.
431
432 // TODO(RISCV): remove MULT instructions (MIPS legacy).
433 MULT = 4,
434 MULTU = 4,
435 DMULT = 4,
436
437 MUL32 = 7,
438
439 DIV32 = 50, // Min:11 Max:50
440 DIV64 = 50,
441 DIVU32 = 50,
442 DIVU64 = 50,
443
444 ABS_S = 4,
445 ABS_D = 4,
446 NEG_S = 4,
447 NEG_D = 4,
448 ADD_S = 4,
449 ADD_D = 4,
450 SUB_S = 4,
451 SUB_D = 4,
452 MAX_S = 4, // Estimated.
453 MIN_S = 4,
454 MAX_D = 4, // Estimated.
455 MIN_D = 4,
456 C_cond_S = 4,
457 C_cond_D = 4,
458 MUL_S = 4,
459
460 MADD_S = 4,
461 MSUB_S = 4,
462 NMADD_S = 4,
463 NMSUB_S = 4,
464
465 CABS_cond_S = 4,
466 CABS_cond_D = 4,
467
468 CVT_D_S = 4,
469 CVT_PS_PW = 4,
470
471 CVT_S_W = 4,
472 CVT_S_L = 4,
473 CVT_D_W = 4,
474 CVT_D_L = 4,
475
476 CVT_S_D = 4,
477
478 CVT_W_S = 4,
479 CVT_W_D = 4,
480 CVT_L_S = 4,
481 CVT_L_D = 4,
482
483 CEIL_W_S = 4,
484 CEIL_W_D = 4,
485 CEIL_L_S = 4,
486 CEIL_L_D = 4,
487
488 FLOOR_W_S = 4,
489 FLOOR_W_D = 4,
490 FLOOR_L_S = 4,
491 FLOOR_L_D = 4,
492
493 ROUND_W_S = 4,
494 ROUND_W_D = 4,
495 ROUND_L_S = 4,
496 ROUND_L_D = 4,
497
498 TRUNC_W_S = 4,
499 TRUNC_W_D = 4,
500 TRUNC_L_S = 4,
501 TRUNC_L_D = 4,
502
503 MOV_S = 4,
504 MOV_D = 4,
505
506 MOVF_S = 4,
507 MOVF_D = 4,
508
509 MOVN_S = 4,
510 MOVN_D = 4,
511
512 MOVT_S = 4,
513 MOVT_D = 4,
514
515 MOVZ_S = 4,
516 MOVZ_D = 4,
517
518 MUL_D = 5,
519 MADD_D = 5,
520 MSUB_D = 5,
521 NMADD_D = 5,
522 NMSUB_D = 5,
523
524 RECIP_S = 13,
525 RECIP_D = 26,
526
527 RSQRT_S = 17,
528 RSQRT_D = 36,
529
530 DIV_S = 17,
531 SQRT_S = 17,
532
533 DIV_D = 32,
534 SQRT_D = 32,
535
536 MOVT_FREG = 4,
537 MOVT_HIGH_FREG = 4,
538 MOVT_DREG = 4,
539 LOAD_FLOAT = 4,
540 LOAD_DOUBLE = 4,
541
542 MOVF_FREG = 1,
543 MOVF_HIGH_FREG = 1,
544 MOVF_HIGH_DREG = 1,
545 MOVF_HIGH = 1,
546 MOVF_LOW = 1,
547 STORE_FLOAT = 1,
548 STORE_DOUBLE = 1,
549 };
550
Add64Latency(bool is_operand_register=true)551 int Add64Latency(bool is_operand_register = true) {
552 if (is_operand_register) {
553 return 1;
554 } else {
555 return 2; // Estimated max.
556 }
557 }
558
Sub64Latency(bool is_operand_register=true)559 int Sub64Latency(bool is_operand_register = true) {
560 return Add64Latency(is_operand_register);
561 }
562
AndLatency(bool is_operand_register=true)563 int AndLatency(bool is_operand_register = true) {
564 return Add64Latency(is_operand_register);
565 }
566
OrLatency(bool is_operand_register=true)567 int OrLatency(bool is_operand_register = true) {
568 return Add64Latency(is_operand_register);
569 }
570
NorLatency(bool is_operand_register=true)571 int NorLatency(bool is_operand_register = true) {
572 if (is_operand_register) {
573 return 1;
574 } else {
575 return 2; // Estimated max.
576 }
577 }
578
XorLatency(bool is_operand_register=true)579 int XorLatency(bool is_operand_register = true) {
580 return Add64Latency(is_operand_register);
581 }
582
Mul32Latency(bool is_operand_register=true)583 int Mul32Latency(bool is_operand_register = true) {
584 if (is_operand_register) {
585 return Latency::MUL32;
586 } else {
587 return Latency::MUL32 + 1;
588 }
589 }
590
Mul64Latency(bool is_operand_register=true)591 int Mul64Latency(bool is_operand_register = true) {
592 int latency = Latency::DMULT + Latency::MOVF_LOW;
593 if (!is_operand_register) {
594 latency += 1;
595 }
596 return latency;
597 }
598
Mulh32Latency(bool is_operand_register=true)599 int Mulh32Latency(bool is_operand_register = true) {
600 int latency = Latency::MULT + Latency::MOVF_HIGH;
601 if (!is_operand_register) {
602 latency += 1;
603 }
604 return latency;
605 }
606
Mulhu32Latency(bool is_operand_register=true)607 int Mulhu32Latency(bool is_operand_register = true) {
608 int latency = Latency::MULTU + Latency::MOVF_HIGH;
609 if (!is_operand_register) {
610 latency += 1;
611 }
612 return latency;
613 }
614
Mulh64Latency(bool is_operand_register=true)615 int Mulh64Latency(bool is_operand_register = true) {
616 int latency = Latency::DMULT + Latency::MOVF_HIGH;
617 if (!is_operand_register) {
618 latency += 1;
619 }
620 return latency;
621 }
622
Div32Latency(bool is_operand_register=true)623 int Div32Latency(bool is_operand_register = true) {
624 if (is_operand_register) {
625 return Latency::DIV32;
626 } else {
627 return Latency::DIV32 + 1;
628 }
629 }
630
Divu32Latency(bool is_operand_register=true)631 int Divu32Latency(bool is_operand_register = true) {
632 if (is_operand_register) {
633 return Latency::DIVU32;
634 } else {
635 return Latency::DIVU32 + 1;
636 }
637 }
638
Div64Latency(bool is_operand_register=true)639 int Div64Latency(bool is_operand_register = true) {
640 int latency = Latency::DIV64 + Latency::MOVF_LOW;
641 if (!is_operand_register) {
642 latency += 1;
643 }
644 return latency;
645 }
646
Divu64Latency(bool is_operand_register=true)647 int Divu64Latency(bool is_operand_register = true) {
648 int latency = Latency::DIVU64 + Latency::MOVF_LOW;
649 if (!is_operand_register) {
650 latency += 1;
651 }
652 return latency;
653 }
654
Mod32Latency(bool is_operand_register=true)655 int Mod32Latency(bool is_operand_register = true) {
656 int latency = Latency::DIV32 + Latency::MOVF_HIGH;
657 if (!is_operand_register) {
658 latency += 1;
659 }
660 return latency;
661 }
662
Modu32Latency(bool is_operand_register=true)663 int Modu32Latency(bool is_operand_register = true) {
664 int latency = Latency::DIVU32 + Latency::MOVF_HIGH;
665 if (!is_operand_register) {
666 latency += 1;
667 }
668 return latency;
669 }
670
Mod64Latency(bool is_operand_register=true)671 int Mod64Latency(bool is_operand_register = true) {
672 int latency = Latency::DIV64 + Latency::MOVF_HIGH;
673 if (!is_operand_register) {
674 latency += 1;
675 }
676 return latency;
677 }
678
Modu64Latency(bool is_operand_register=true)679 int Modu64Latency(bool is_operand_register = true) {
680 int latency = Latency::DIV64 + Latency::MOVF_HIGH;
681 if (!is_operand_register) {
682 latency += 1;
683 }
684 return latency;
685 }
686
MovzLatency()687 int MovzLatency() { return 1; }
688
MovnLatency()689 int MovnLatency() { return 1; }
690
CallLatency()691 int CallLatency() {
692 // Estimated.
693 return Add64Latency(false) + Latency::BRANCH + 5;
694 }
695
JumpLatency()696 int JumpLatency() {
697 // Estimated max.
698 return 1 + Add64Latency() + Latency::BRANCH + 2;
699 }
700
SmiUntagLatency()701 int SmiUntagLatency() { return 1; }
702
PrepareForTailCallLatency()703 int PrepareForTailCallLatency() {
704 // Estimated max.
705 return 2 * (Add64Latency() + 1 + Add64Latency(false)) + 2 + Latency::BRANCH +
706 Latency::BRANCH + 2 * Sub64Latency(false) + 2 + Latency::BRANCH + 1;
707 }
708
AssemblePopArgumentsAdoptFrameLatency()709 int AssemblePopArgumentsAdoptFrameLatency() {
710 return 1 + Latency::BRANCH + 1 + SmiUntagLatency() +
711 PrepareForTailCallLatency();
712 }
713
AssertLatency()714 int AssertLatency() { return 1; }
715
PrepareCallCFunctionLatency()716 int PrepareCallCFunctionLatency() {
717 int frame_alignment = TurboAssembler::ActivationFrameAlignment();
718 if (frame_alignment > kSystemPointerSize) {
719 return 1 + Sub64Latency(false) + AndLatency(false) + 1;
720 } else {
721 return Sub64Latency(false);
722 }
723 }
724
AdjustBaseAndOffsetLatency()725 int AdjustBaseAndOffsetLatency() {
726 return 3; // Estimated max.
727 }
728
AlignedMemoryLatency()729 int AlignedMemoryLatency() { return AdjustBaseAndOffsetLatency() + 1; }
730
UlhuLatency()731 int UlhuLatency() {
732 return AdjustBaseAndOffsetLatency() + 2 * AlignedMemoryLatency() + 2;
733 }
734
UlwLatency()735 int UlwLatency() {
736 // Estimated max.
737 return AdjustBaseAndOffsetLatency() + 3;
738 }
739
UlwuLatency()740 int UlwuLatency() { return UlwLatency() + 1; }
741
UldLatency()742 int UldLatency() {
743 // Estimated max.
744 return AdjustBaseAndOffsetLatency() + 3;
745 }
746
ULoadFloatLatency()747 int ULoadFloatLatency() { return UlwLatency() + Latency::MOVT_FREG; }
748
ULoadDoubleLatency()749 int ULoadDoubleLatency() { return UldLatency() + Latency::MOVT_DREG; }
750
UshLatency()751 int UshLatency() {
752 // Estimated max.
753 return AdjustBaseAndOffsetLatency() + 2 + 2 * AlignedMemoryLatency();
754 }
755
UswLatency()756 int UswLatency() { return AdjustBaseAndOffsetLatency() + 2; }
757
UsdLatency()758 int UsdLatency() { return AdjustBaseAndOffsetLatency() + 2; }
759
UStoreFloatLatency()760 int UStoreFloatLatency() { return Latency::MOVF_FREG + UswLatency(); }
761
UStoreDoubleLatency()762 int UStoreDoubleLatency() { return Latency::MOVF_HIGH_DREG + UsdLatency(); }
763
LoadFloatLatency()764 int LoadFloatLatency() {
765 return AdjustBaseAndOffsetLatency() + Latency::LOAD_FLOAT;
766 }
767
StoreFloatLatency()768 int StoreFloatLatency() {
769 return AdjustBaseAndOffsetLatency() + Latency::STORE_FLOAT;
770 }
771
StoreDoubleLatency()772 int StoreDoubleLatency() {
773 return AdjustBaseAndOffsetLatency() + Latency::STORE_DOUBLE;
774 }
775
LoadDoubleLatency()776 int LoadDoubleLatency() {
777 return AdjustBaseAndOffsetLatency() + Latency::LOAD_DOUBLE;
778 }
779
MultiPushLatency()780 int MultiPushLatency() {
781 int latency = Sub64Latency(false);
782 for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
783 latency++;
784 }
785 return latency;
786 }
787
MultiPushFPULatency()788 int MultiPushFPULatency() {
789 int latency = Sub64Latency(false);
790 for (int16_t i = kNumRegisters - 1; i >= 0; i--) {
791 latency += StoreDoubleLatency();
792 }
793 return latency;
794 }
795
PushCallerSavedLatency(SaveFPRegsMode fp_mode)796 int PushCallerSavedLatency(SaveFPRegsMode fp_mode) {
797 int latency = MultiPushLatency();
798 if (fp_mode == SaveFPRegsMode::kSave) {
799 latency += MultiPushFPULatency();
800 }
801 return latency;
802 }
803
MultiPopLatency()804 int MultiPopLatency() {
805 int latency = Add64Latency(false);
806 for (int16_t i = 0; i < kNumRegisters; i++) {
807 latency++;
808 }
809 return latency;
810 }
811
MultiPopFPULatency()812 int MultiPopFPULatency() {
813 int latency = Add64Latency(false);
814 for (int16_t i = 0; i < kNumRegisters; i++) {
815 latency += LoadDoubleLatency();
816 }
817 return latency;
818 }
819
PopCallerSavedLatency(SaveFPRegsMode fp_mode)820 int PopCallerSavedLatency(SaveFPRegsMode fp_mode) {
821 int latency = MultiPopLatency();
822 if (fp_mode == SaveFPRegsMode::kSave) {
823 latency += MultiPopFPULatency();
824 }
825 return latency;
826 }
827
CallCFunctionHelperLatency()828 int CallCFunctionHelperLatency() {
829 // Estimated.
830 int latency = AndLatency(false) + Latency::BRANCH + 2 + CallLatency();
831 if (base::OS::ActivationFrameAlignment() > kSystemPointerSize) {
832 latency++;
833 } else {
834 latency += Add64Latency(false);
835 }
836 return latency;
837 }
838
CallCFunctionLatency()839 int CallCFunctionLatency() { return 1 + CallCFunctionHelperLatency(); }
840
AssembleArchJumpLatency()841 int AssembleArchJumpLatency() {
842 // Estimated max.
843 return Latency::BRANCH;
844 }
845
GenerateSwitchTableLatency()846 int GenerateSwitchTableLatency() {
847 int latency = 6;
848 latency += 2;
849 return latency;
850 }
851
AssembleArchTableSwitchLatency()852 int AssembleArchTableSwitchLatency() {
853 return Latency::BRANCH + GenerateSwitchTableLatency();
854 }
855
DropAndRetLatency()856 int DropAndRetLatency() {
857 // Estimated max.
858 return Add64Latency(false) + JumpLatency();
859 }
860
AssemblerReturnLatency()861 int AssemblerReturnLatency() {
862 // Estimated max.
863 return Add64Latency(false) + MultiPopLatency() + MultiPopFPULatency() +
864 Latency::BRANCH + Add64Latency() + 1 + DropAndRetLatency();
865 }
866
TryInlineTruncateDoubleToILatency()867 int TryInlineTruncateDoubleToILatency() {
868 return 2 + Latency::TRUNC_W_D + Latency::MOVF_FREG + 2 + AndLatency(false) +
869 Latency::BRANCH;
870 }
871
CallStubDelayedLatency()872 int CallStubDelayedLatency() { return 1 + CallLatency(); }
873
TruncateDoubleToIDelayedLatency()874 int TruncateDoubleToIDelayedLatency() {
875 // TODO(riscv): This no longer reflects how TruncateDoubleToI is called.
876 return TryInlineTruncateDoubleToILatency() + 1 + Sub64Latency(false) +
877 StoreDoubleLatency() + CallStubDelayedLatency() + Add64Latency(false) +
878 1;
879 }
880
CheckPageFlagLatency()881 int CheckPageFlagLatency() {
882 return AndLatency(false) + AlignedMemoryLatency() + AndLatency(false) +
883 Latency::BRANCH;
884 }
885
SltuLatency(bool is_operand_register=true)886 int SltuLatency(bool is_operand_register = true) {
887 if (is_operand_register) {
888 return 1;
889 } else {
890 return 2; // Estimated max.
891 }
892 }
893
BranchShortHelperLatency()894 int BranchShortHelperLatency() {
895 return SltuLatency() + 2; // Estimated max.
896 }
897
BranchShortLatency()898 int BranchShortLatency() { return BranchShortHelperLatency(); }
899
MoveLatency()900 int MoveLatency() { return 1; }
901
MovToFloatParametersLatency()902 int MovToFloatParametersLatency() { return 2 * MoveLatency(); }
903
MovFromFloatResultLatency()904 int MovFromFloatResultLatency() { return MoveLatency(); }
905
AddOverflow64Latency()906 int AddOverflow64Latency() {
907 // Estimated max.
908 return 6;
909 }
910
SubOverflow64Latency()911 int SubOverflow64Latency() {
912 // Estimated max.
913 return 6;
914 }
915
MulOverflow32Latency()916 int MulOverflow32Latency() {
917 // Estimated max.
918 return Mul32Latency() + Mulh32Latency() + 2;
919 }
920
921 // TODO(RISCV): This is incorrect for RISC-V.
Clz64Latency()922 int Clz64Latency() { return 1; }
923
Ctz32Latency()924 int Ctz32Latency() {
925 return Add64Latency(false) + XorLatency() + AndLatency() + Clz64Latency() +
926 1 + Sub64Latency();
927 }
928
Ctz64Latency()929 int Ctz64Latency() {
930 return Add64Latency(false) + XorLatency() + AndLatency() + 1 + Sub64Latency();
931 }
932
Popcnt32Latency()933 int Popcnt32Latency() {
934 return 2 + AndLatency() + Sub64Latency() + 1 + AndLatency() + 1 +
935 AndLatency() + Add64Latency() + 1 + Add64Latency() + 1 + AndLatency() +
936 1 + Mul32Latency() + 1;
937 }
938
Popcnt64Latency()939 int Popcnt64Latency() {
940 return 2 + AndLatency() + Sub64Latency() + 1 + AndLatency() + 1 +
941 AndLatency() + Add64Latency() + 1 + Add64Latency() + 1 + AndLatency() +
942 1 + Mul64Latency() + 1;
943 }
944
CompareFLatency()945 int CompareFLatency() { return Latency::C_cond_S; }
946
CompareF32Latency()947 int CompareF32Latency() { return CompareFLatency(); }
948
CompareF64Latency()949 int CompareF64Latency() { return CompareFLatency(); }
950
CompareIsNanFLatency()951 int CompareIsNanFLatency() { return CompareFLatency(); }
952
CompareIsNanF32Latency()953 int CompareIsNanF32Latency() { return CompareIsNanFLatency(); }
954
CompareIsNanF64Latency()955 int CompareIsNanF64Latency() { return CompareIsNanFLatency(); }
956
NegsLatency()957 int NegsLatency() {
958 // Estimated.
959 return CompareIsNanF32Latency() + 2 * Latency::BRANCH + Latency::NEG_S +
960 Latency::MOVF_FREG + 1 + XorLatency() + Latency::MOVT_FREG;
961 }
962
NegdLatency()963 int NegdLatency() {
964 // Estimated.
965 return CompareIsNanF64Latency() + 2 * Latency::BRANCH + Latency::NEG_D +
966 Latency::MOVF_HIGH_DREG + 1 + XorLatency() + Latency::MOVT_DREG;
967 }
968
Float64RoundLatency()969 int Float64RoundLatency() {
970 // For ceil_l_d, floor_l_d, round_l_d, trunc_l_d latency is 4.
971 return Latency::MOVF_HIGH_DREG + 1 + Latency::BRANCH + Latency::MOV_D + 4 +
972 Latency::MOVF_HIGH_DREG + Latency::BRANCH + Latency::CVT_D_L + 2 +
973 Latency::MOVT_HIGH_FREG;
974 }
975
Float32RoundLatency()976 int Float32RoundLatency() {
977 // For ceil_w_s, floor_w_s, round_w_s, trunc_w_s latency is 4.
978 return Latency::MOVF_FREG + 1 + Latency::BRANCH + Latency::MOV_S + 4 +
979 Latency::MOVF_FREG + Latency::BRANCH + Latency::CVT_S_W + 2 +
980 Latency::MOVT_FREG;
981 }
982
Float32MaxLatency()983 int Float32MaxLatency() {
984 // Estimated max.
985 int latency = CompareIsNanF32Latency() + Latency::BRANCH;
986 return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
987 Latency::MOVF_FREG + 1 + Latency::MOV_S;
988 }
989
Float64MaxLatency()990 int Float64MaxLatency() {
991 // Estimated max.
992 int latency = CompareIsNanF64Latency() + Latency::BRANCH;
993 return latency + 5 * Latency::BRANCH + 2 * CompareF64Latency() +
994 Latency::MOVF_HIGH_DREG + Latency::MOV_D;
995 }
996
Float32MinLatency()997 int Float32MinLatency() {
998 // Estimated max.
999 int latency = CompareIsNanF32Latency() + Latency::BRANCH;
1000 return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
1001 Latency::MOVF_FREG + 1 + Latency::MOV_S;
1002 }
1003
Float64MinLatency()1004 int Float64MinLatency() {
1005 // Estimated max.
1006 int latency = CompareIsNanF64Latency() + Latency::BRANCH;
1007 return latency + 5 * Latency::BRANCH + 2 * CompareF32Latency() +
1008 Latency::MOVF_HIGH_DREG + Latency::MOV_D;
1009 }
1010
TruncLSLatency(bool load_status)1011 int TruncLSLatency(bool load_status) {
1012 int latency = Latency::TRUNC_L_S + Latency::MOVF_HIGH_DREG;
1013 if (load_status) {
1014 latency += SltuLatency() + 7;
1015 }
1016 return latency;
1017 }
1018
TruncLDLatency(bool load_status)1019 int TruncLDLatency(bool load_status) {
1020 int latency = Latency::TRUNC_L_D + Latency::MOVF_HIGH_DREG;
1021 if (load_status) {
1022 latency += SltuLatency() + 7;
1023 }
1024 return latency;
1025 }
1026
TruncUlSLatency()1027 int TruncUlSLatency() {
1028 // Estimated max.
1029 return 2 * CompareF32Latency() + CompareIsNanF32Latency() +
1030 4 * Latency::BRANCH + Latency::SUB_S + 2 * Latency::TRUNC_L_S +
1031 3 * Latency::MOVF_HIGH_DREG + OrLatency() + Latency::MOVT_FREG +
1032 Latency::MOV_S + SltuLatency() + 4;
1033 }
1034
TruncUlDLatency()1035 int TruncUlDLatency() {
1036 // Estimated max.
1037 return 2 * CompareF64Latency() + CompareIsNanF64Latency() +
1038 4 * Latency::BRANCH + Latency::SUB_D + 2 * Latency::TRUNC_L_D +
1039 3 * Latency::MOVF_HIGH_DREG + OrLatency() + Latency::MOVT_DREG +
1040 Latency::MOV_D + SltuLatency() + 4;
1041 }
1042
PushLatency()1043 int PushLatency() { return Add64Latency() + AlignedMemoryLatency(); }
1044
ByteSwapSignedLatency()1045 int ByteSwapSignedLatency() { return 2; }
1046
LlLatency(int offset)1047 int LlLatency(int offset) {
1048 bool is_one_instruction = is_int12(offset);
1049 if (is_one_instruction) {
1050 return 1;
1051 } else {
1052 return 3;
1053 }
1054 }
1055
ExtractBitsLatency(bool sign_extend,int size)1056 int ExtractBitsLatency(bool sign_extend, int size) {
1057 int latency = 2;
1058 if (sign_extend) {
1059 switch (size) {
1060 case 8:
1061 case 16:
1062 case 32:
1063 latency += 1;
1064 break;
1065 default:
1066 UNREACHABLE();
1067 }
1068 }
1069 return latency;
1070 }
1071
InsertBitsLatency()1072 int InsertBitsLatency() { return 2 + Sub64Latency(false) + 2; }
1073
ScLatency(int offset)1074 int ScLatency(int offset) { return 3; }
1075
Word32AtomicExchangeLatency(bool sign_extend,int size)1076 int Word32AtomicExchangeLatency(bool sign_extend, int size) {
1077 return Add64Latency(false) + 1 + Sub64Latency() + 2 + LlLatency(0) +
1078 ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
1079 ScLatency(0) + BranchShortLatency() + 1;
1080 }
1081
Word32AtomicCompareExchangeLatency(bool sign_extend,int size)1082 int Word32AtomicCompareExchangeLatency(bool sign_extend, int size) {
1083 return 2 + Sub64Latency() + 2 + LlLatency(0) +
1084 ExtractBitsLatency(sign_extend, size) + InsertBitsLatency() +
1085 ScLatency(0) + BranchShortLatency() + 1;
1086 }
1087
GetInstructionLatency(const Instruction * instr)1088 int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
1089 // TODO(RISCV): Verify these latencies for RISC-V (currently using MIPS
1090 // numbers).
1091 switch (instr->arch_opcode()) {
1092 case kArchCallCodeObject:
1093 case kArchCallWasmFunction:
1094 return CallLatency();
1095 case kArchTailCallCodeObject:
1096 case kArchTailCallWasm:
1097 case kArchTailCallAddress:
1098 return JumpLatency();
1099 case kArchCallJSFunction: {
1100 int latency = 0;
1101 if (FLAG_debug_code) {
1102 latency = 1 + AssertLatency();
1103 }
1104 return latency + 1 + Add64Latency(false) + CallLatency();
1105 }
1106 case kArchPrepareCallCFunction:
1107 return PrepareCallCFunctionLatency();
1108 case kArchSaveCallerRegisters: {
1109 auto fp_mode =
1110 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1111 return PushCallerSavedLatency(fp_mode);
1112 }
1113 case kArchRestoreCallerRegisters: {
1114 auto fp_mode =
1115 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1116 return PopCallerSavedLatency(fp_mode);
1117 }
1118 case kArchPrepareTailCall:
1119 return 2;
1120 case kArchCallCFunction:
1121 return CallCFunctionLatency();
1122 case kArchJmp:
1123 return AssembleArchJumpLatency();
1124 case kArchTableSwitch:
1125 return AssembleArchTableSwitchLatency();
1126 case kArchAbortCSADcheck:
1127 return CallLatency() + 1;
1128 case kArchDebugBreak:
1129 return 1;
1130 case kArchComment:
1131 case kArchNop:
1132 case kArchThrowTerminator:
1133 case kArchDeoptimize:
1134 return 0;
1135 case kArchRet:
1136 return AssemblerReturnLatency();
1137 case kArchFramePointer:
1138 return 1;
1139 case kArchParentFramePointer:
1140 // Estimated max.
1141 return AlignedMemoryLatency();
1142 case kArchTruncateDoubleToI:
1143 return TruncateDoubleToIDelayedLatency();
1144 case kArchStoreWithWriteBarrier:
1145 return Add64Latency() + 1 + CheckPageFlagLatency();
1146 case kArchStackSlot:
1147 // Estimated max.
1148 return Add64Latency(false) + AndLatency(false) + AssertLatency() +
1149 Add64Latency(false) + AndLatency(false) + BranchShortLatency() +
1150 1 + Sub64Latency() + Add64Latency();
1151 case kIeee754Float64Acos:
1152 case kIeee754Float64Acosh:
1153 case kIeee754Float64Asin:
1154 case kIeee754Float64Asinh:
1155 case kIeee754Float64Atan:
1156 case kIeee754Float64Atanh:
1157 case kIeee754Float64Atan2:
1158 case kIeee754Float64Cos:
1159 case kIeee754Float64Cosh:
1160 case kIeee754Float64Cbrt:
1161 case kIeee754Float64Exp:
1162 case kIeee754Float64Expm1:
1163 case kIeee754Float64Log:
1164 case kIeee754Float64Log1p:
1165 case kIeee754Float64Log10:
1166 case kIeee754Float64Log2:
1167 case kIeee754Float64Pow:
1168 case kIeee754Float64Sin:
1169 case kIeee754Float64Sinh:
1170 case kIeee754Float64Tan:
1171 case kIeee754Float64Tanh:
1172 return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
1173 CallCFunctionLatency() + MovFromFloatResultLatency();
1174 case kRiscvAdd32:
1175 case kRiscvAdd64:
1176 return Add64Latency(instr->InputAt(1)->IsRegister());
1177 case kRiscvAddOvf64:
1178 return AddOverflow64Latency();
1179 case kRiscvSub32:
1180 case kRiscvSub64:
1181 return Sub64Latency(instr->InputAt(1)->IsRegister());
1182 case kRiscvSubOvf64:
1183 return SubOverflow64Latency();
1184 case kRiscvMul32:
1185 return Mul32Latency();
1186 case kRiscvMulOvf32:
1187 return MulOverflow32Latency();
1188 case kRiscvMulHigh32:
1189 return Mulh32Latency();
1190 case kRiscvMulHighU32:
1191 return Mulhu32Latency();
1192 case kRiscvMulHigh64:
1193 return Mulh64Latency();
1194 case kRiscvDiv32: {
1195 int latency = Div32Latency(instr->InputAt(1)->IsRegister());
1196 return latency + MovzLatency();
1197 }
1198 case kRiscvDivU32: {
1199 int latency = Divu32Latency(instr->InputAt(1)->IsRegister());
1200 return latency + MovzLatency();
1201 }
1202 case kRiscvMod32:
1203 return Mod32Latency();
1204 case kRiscvModU32:
1205 return Modu32Latency();
1206 case kRiscvMul64:
1207 return Mul64Latency();
1208 case kRiscvDiv64: {
1209 int latency = Div64Latency();
1210 return latency + MovzLatency();
1211 }
1212 case kRiscvDivU64: {
1213 int latency = Divu64Latency();
1214 return latency + MovzLatency();
1215 }
1216 case kRiscvMod64:
1217 return Mod64Latency();
1218 case kRiscvModU64:
1219 return Modu64Latency();
1220 case kRiscvAnd:
1221 return AndLatency(instr->InputAt(1)->IsRegister());
1222 case kRiscvAnd32: {
1223 bool is_operand_register = instr->InputAt(1)->IsRegister();
1224 int latency = AndLatency(is_operand_register);
1225 if (is_operand_register) {
1226 return latency + 2;
1227 } else {
1228 return latency + 1;
1229 }
1230 }
1231 case kRiscvOr:
1232 return OrLatency(instr->InputAt(1)->IsRegister());
1233 case kRiscvOr32: {
1234 bool is_operand_register = instr->InputAt(1)->IsRegister();
1235 int latency = OrLatency(is_operand_register);
1236 if (is_operand_register) {
1237 return latency + 2;
1238 } else {
1239 return latency + 1;
1240 }
1241 }
1242 case kRiscvNor:
1243 return NorLatency(instr->InputAt(1)->IsRegister());
1244 case kRiscvNor32: {
1245 bool is_operand_register = instr->InputAt(1)->IsRegister();
1246 int latency = NorLatency(is_operand_register);
1247 if (is_operand_register) {
1248 return latency + 2;
1249 } else {
1250 return latency + 1;
1251 }
1252 }
1253 case kRiscvXor:
1254 return XorLatency(instr->InputAt(1)->IsRegister());
1255 case kRiscvXor32: {
1256 bool is_operand_register = instr->InputAt(1)->IsRegister();
1257 int latency = XorLatency(is_operand_register);
1258 if (is_operand_register) {
1259 return latency + 2;
1260 } else {
1261 return latency + 1;
1262 }
1263 }
1264 case kRiscvClz32:
1265 case kRiscvClz64:
1266 return Clz64Latency();
1267 case kRiscvCtz32:
1268 return Ctz32Latency();
1269 case kRiscvCtz64:
1270 return Ctz64Latency();
1271 case kRiscvPopcnt32:
1272 return Popcnt32Latency();
1273 case kRiscvPopcnt64:
1274 return Popcnt64Latency();
1275 case kRiscvShl32:
1276 return 1;
1277 case kRiscvShr32:
1278 case kRiscvSar32:
1279 case kRiscvZeroExtendWord:
1280 return 2;
1281 case kRiscvSignExtendWord:
1282 case kRiscvShl64:
1283 case kRiscvShr64:
1284 case kRiscvSar64:
1285 case kRiscvRor32:
1286 case kRiscvRor64:
1287 return 1;
1288 case kRiscvTst:
1289 return AndLatency(instr->InputAt(1)->IsRegister());
1290 case kRiscvMov:
1291 return 1;
1292 case kRiscvCmpS:
1293 return MoveLatency() + CompareF32Latency();
1294 case kRiscvAddS:
1295 return Latency::ADD_S;
1296 case kRiscvSubS:
1297 return Latency::SUB_S;
1298 case kRiscvMulS:
1299 return Latency::MUL_S;
1300 case kRiscvDivS:
1301 return Latency::DIV_S;
1302 case kRiscvModS:
1303 return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
1304 CallCFunctionLatency() + MovFromFloatResultLatency();
1305 case kRiscvAbsS:
1306 return Latency::ABS_S;
1307 case kRiscvNegS:
1308 return NegdLatency();
1309 case kRiscvSqrtS:
1310 return Latency::SQRT_S;
1311 case kRiscvMaxS:
1312 return Latency::MAX_S;
1313 case kRiscvMinS:
1314 return Latency::MIN_S;
1315 case kRiscvCmpD:
1316 return MoveLatency() + CompareF64Latency();
1317 case kRiscvAddD:
1318 return Latency::ADD_D;
1319 case kRiscvSubD:
1320 return Latency::SUB_D;
1321 case kRiscvMulD:
1322 return Latency::MUL_D;
1323 case kRiscvDivD:
1324 return Latency::DIV_D;
1325 case kRiscvModD:
1326 return PrepareCallCFunctionLatency() + MovToFloatParametersLatency() +
1327 CallCFunctionLatency() + MovFromFloatResultLatency();
1328 case kRiscvAbsD:
1329 return Latency::ABS_D;
1330 case kRiscvNegD:
1331 return NegdLatency();
1332 case kRiscvSqrtD:
1333 return Latency::SQRT_D;
1334 case kRiscvMaxD:
1335 return Latency::MAX_D;
1336 case kRiscvMinD:
1337 return Latency::MIN_D;
1338 case kRiscvFloat64RoundDown:
1339 case kRiscvFloat64RoundTruncate:
1340 case kRiscvFloat64RoundUp:
1341 case kRiscvFloat64RoundTiesEven:
1342 return Float64RoundLatency();
1343 case kRiscvFloat32RoundDown:
1344 case kRiscvFloat32RoundTruncate:
1345 case kRiscvFloat32RoundUp:
1346 case kRiscvFloat32RoundTiesEven:
1347 return Float32RoundLatency();
1348 case kRiscvFloat32Max:
1349 return Float32MaxLatency();
1350 case kRiscvFloat64Max:
1351 return Float64MaxLatency();
1352 case kRiscvFloat32Min:
1353 return Float32MinLatency();
1354 case kRiscvFloat64Min:
1355 return Float64MinLatency();
1356 case kRiscvFloat64SilenceNaN:
1357 return Latency::SUB_D;
1358 case kRiscvCvtSD:
1359 return Latency::CVT_S_D;
1360 case kRiscvCvtDS:
1361 return Latency::CVT_D_S;
1362 case kRiscvCvtDW:
1363 return Latency::MOVT_FREG + Latency::CVT_D_W;
1364 case kRiscvCvtSW:
1365 return Latency::MOVT_FREG + Latency::CVT_S_W;
1366 case kRiscvCvtSUw:
1367 return 1 + Latency::MOVT_DREG + Latency::CVT_S_L;
1368 case kRiscvCvtSL:
1369 return Latency::MOVT_DREG + Latency::CVT_S_L;
1370 case kRiscvCvtDL:
1371 return Latency::MOVT_DREG + Latency::CVT_D_L;
1372 case kRiscvCvtDUw:
1373 return 1 + Latency::MOVT_DREG + Latency::CVT_D_L;
1374 case kRiscvCvtDUl:
1375 return 2 * Latency::BRANCH + 3 + 2 * Latency::MOVT_DREG +
1376 2 * Latency::CVT_D_L + Latency::ADD_D;
1377 case kRiscvCvtSUl:
1378 return 2 * Latency::BRANCH + 3 + 2 * Latency::MOVT_DREG +
1379 2 * Latency::CVT_S_L + Latency::ADD_S;
1380 case kRiscvFloorWD:
1381 return Latency::FLOOR_W_D + Latency::MOVF_FREG;
1382 case kRiscvCeilWD:
1383 return Latency::CEIL_W_D + Latency::MOVF_FREG;
1384 case kRiscvRoundWD:
1385 return Latency::ROUND_W_D + Latency::MOVF_FREG;
1386 case kRiscvTruncWD:
1387 return Latency::TRUNC_W_D + Latency::MOVF_FREG;
1388 case kRiscvFloorWS:
1389 return Latency::FLOOR_W_S + Latency::MOVF_FREG;
1390 case kRiscvCeilWS:
1391 return Latency::CEIL_W_S + Latency::MOVF_FREG;
1392 case kRiscvRoundWS:
1393 return Latency::ROUND_W_S + Latency::MOVF_FREG;
1394 case kRiscvTruncWS:
1395 return Latency::TRUNC_W_S + Latency::MOVF_FREG + 2 + MovnLatency();
1396 case kRiscvTruncLS:
1397 return TruncLSLatency(instr->OutputCount() > 1);
1398 case kRiscvTruncLD:
1399 return TruncLDLatency(instr->OutputCount() > 1);
1400 case kRiscvTruncUwD:
1401 // Estimated max.
1402 return CompareF64Latency() + 2 * Latency::BRANCH +
1403 2 * Latency::TRUNC_W_D + Latency::SUB_D + OrLatency() +
1404 Latency::MOVT_FREG + Latency::MOVF_FREG + Latency::MOVT_HIGH_FREG +
1405 1;
1406 case kRiscvTruncUwS:
1407 // Estimated max.
1408 return CompareF32Latency() + 2 * Latency::BRANCH +
1409 2 * Latency::TRUNC_W_S + Latency::SUB_S + OrLatency() +
1410 Latency::MOVT_FREG + 2 * Latency::MOVF_FREG + 2 + MovzLatency();
1411 case kRiscvTruncUlS:
1412 return TruncUlSLatency();
1413 case kRiscvTruncUlD:
1414 return TruncUlDLatency();
1415 case kRiscvBitcastDL:
1416 return Latency::MOVF_HIGH_DREG;
1417 case kRiscvBitcastLD:
1418 return Latency::MOVT_DREG;
1419 case kRiscvFloat64ExtractLowWord32:
1420 return Latency::MOVF_FREG;
1421 case kRiscvFloat64InsertLowWord32:
1422 return Latency::MOVF_HIGH_FREG + Latency::MOVT_FREG +
1423 Latency::MOVT_HIGH_FREG;
1424 case kRiscvFloat64ExtractHighWord32:
1425 return Latency::MOVF_HIGH_FREG;
1426 case kRiscvFloat64InsertHighWord32:
1427 return Latency::MOVT_HIGH_FREG;
1428 case kRiscvSignExtendByte:
1429 case kRiscvSignExtendShort:
1430 return 1;
1431 case kRiscvLbu:
1432 case kRiscvLb:
1433 case kRiscvLhu:
1434 case kRiscvLh:
1435 case kRiscvLwu:
1436 case kRiscvLw:
1437 case kRiscvLd:
1438 case kRiscvSb:
1439 case kRiscvSh:
1440 case kRiscvSw:
1441 case kRiscvSd:
1442 return AlignedMemoryLatency();
1443 case kRiscvLoadFloat:
1444 return ULoadFloatLatency();
1445 case kRiscvLoadDouble:
1446 return LoadDoubleLatency();
1447 case kRiscvStoreFloat:
1448 return StoreFloatLatency();
1449 case kRiscvStoreDouble:
1450 return StoreDoubleLatency();
1451 case kRiscvUlhu:
1452 case kRiscvUlh:
1453 return UlhuLatency();
1454 case kRiscvUlwu:
1455 return UlwuLatency();
1456 case kRiscvUlw:
1457 return UlwLatency();
1458 case kRiscvUld:
1459 return UldLatency();
1460 case kRiscvULoadFloat:
1461 return ULoadFloatLatency();
1462 case kRiscvULoadDouble:
1463 return ULoadDoubleLatency();
1464 case kRiscvUsh:
1465 return UshLatency();
1466 case kRiscvUsw:
1467 return UswLatency();
1468 case kRiscvUsd:
1469 return UsdLatency();
1470 case kRiscvUStoreFloat:
1471 return UStoreFloatLatency();
1472 case kRiscvUStoreDouble:
1473 return UStoreDoubleLatency();
1474 case kRiscvPush: {
1475 int latency = 0;
1476 if (instr->InputAt(0)->IsFPRegister()) {
1477 latency = StoreDoubleLatency() + Sub64Latency(false);
1478 } else {
1479 latency = PushLatency();
1480 }
1481 return latency;
1482 }
1483 case kRiscvPeek: {
1484 int latency = 0;
1485 if (instr->OutputAt(0)->IsFPRegister()) {
1486 auto op = LocationOperand::cast(instr->OutputAt(0));
1487 switch (op->representation()) {
1488 case MachineRepresentation::kFloat64:
1489 latency = LoadDoubleLatency();
1490 break;
1491 case MachineRepresentation::kFloat32:
1492 latency = Latency::LOAD_FLOAT;
1493 break;
1494 default:
1495 UNREACHABLE();
1496 }
1497 } else {
1498 latency = AlignedMemoryLatency();
1499 }
1500 return latency;
1501 }
1502 case kRiscvStackClaim:
1503 return Sub64Latency(false);
1504 case kRiscvStoreToStackSlot: {
1505 int latency = 0;
1506 if (instr->InputAt(0)->IsFPRegister()) {
1507 if (instr->InputAt(0)->IsSimd128Register()) {
1508 latency = 1; // Estimated value.
1509 } else {
1510 latency = StoreDoubleLatency();
1511 }
1512 } else {
1513 latency = AlignedMemoryLatency();
1514 }
1515 return latency;
1516 }
1517 case kRiscvByteSwap64:
1518 return ByteSwapSignedLatency();
1519 case kRiscvByteSwap32:
1520 return ByteSwapSignedLatency();
1521 case kAtomicLoadInt8:
1522 case kAtomicLoadUint8:
1523 case kAtomicLoadInt16:
1524 case kAtomicLoadUint16:
1525 case kAtomicLoadWord32:
1526 return 2;
1527 case kAtomicStoreWord8:
1528 case kAtomicStoreWord16:
1529 case kAtomicStoreWord32:
1530 return 3;
1531 case kAtomicExchangeInt8:
1532 return Word32AtomicExchangeLatency(true, 8);
1533 case kAtomicExchangeUint8:
1534 return Word32AtomicExchangeLatency(false, 8);
1535 case kAtomicExchangeInt16:
1536 return Word32AtomicExchangeLatency(true, 16);
1537 case kAtomicExchangeUint16:
1538 return Word32AtomicExchangeLatency(false, 16);
1539 case kAtomicExchangeWord32:
1540 return 2 + LlLatency(0) + 1 + ScLatency(0) + BranchShortLatency() + 1;
1541 case kAtomicCompareExchangeInt8:
1542 return Word32AtomicCompareExchangeLatency(true, 8);
1543 case kAtomicCompareExchangeUint8:
1544 return Word32AtomicCompareExchangeLatency(false, 8);
1545 case kAtomicCompareExchangeInt16:
1546 return Word32AtomicCompareExchangeLatency(true, 16);
1547 case kAtomicCompareExchangeUint16:
1548 return Word32AtomicCompareExchangeLatency(false, 16);
1549 case kAtomicCompareExchangeWord32:
1550 return 3 + LlLatency(0) + BranchShortLatency() + 1 + ScLatency(0) +
1551 BranchShortLatency() + 1;
1552 case kRiscvAssertEqual:
1553 return AssertLatency();
1554 default:
1555 return 1;
1556 }
1557 }
1558
1559 } // namespace compiler
1560 } // namespace internal
1561 } // namespace v8
1562