1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "scheduler_arm.h"
18
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "code_generator_arm_vixl.h"
21 #include "code_generator_utils.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "mirror/array-inl.h"
25 #include "mirror/string.h"
26
27 namespace art HIDDEN {
28 namespace arm {
29
30 using helpers::Int32ConstantFrom;
31 using helpers::Uint64ConstantFrom;
32
33 // AArch32 instruction latencies.
34 // We currently assume that all ARM CPUs share the same instruction latency list.
35 // The following latencies were tuned based on performance experiments and
36 // automatic tuning using differential evolution approach on various benchmarks.
37 static constexpr uint32_t kArmIntegerOpLatency = 2;
38 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
39 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
40 static constexpr uint32_t kArmMulIntegerLatency = 6;
41 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
42 static constexpr uint32_t kArmDivIntegerLatency = 10;
43 static constexpr uint32_t kArmDivFloatLatency = 20;
44 static constexpr uint32_t kArmDivDoubleLatency = 25;
45 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
46 static constexpr uint32_t kArmMemoryLoadLatency = 9;
47 static constexpr uint32_t kArmMemoryStoreLatency = 9;
48 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
49 static constexpr uint32_t kArmBranchLatency = 4;
50 static constexpr uint32_t kArmCallLatency = 5;
51 static constexpr uint32_t kArmCallInternalLatency = 29;
52 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
53 static constexpr uint32_t kArmNopLatency = 2;
54 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
55 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
56
57 class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
58 public:
SchedulingLatencyVisitorARM(CodeGenerator * codegen)59 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
60 : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
61
62 // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction *)63 void VisitInstruction([[maybe_unused]] HInstruction*) override {
64 last_visited_latency_ = kArmIntegerOpLatency;
65 }
66
67 // We add a second unused parameter to be able to use this macro like the others
68 // defined in `nodes.h`.
69 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
70 M(ArrayGet, unused) \
71 M(ArrayLength, unused) \
72 M(ArraySet, unused) \
73 M(Add, unused) \
74 M(Sub, unused) \
75 M(And, unused) \
76 M(Or, unused) \
77 M(Ror, unused) \
78 M(Xor, unused) \
79 M(Shl, unused) \
80 M(Shr, unused) \
81 M(UShr, unused) \
82 M(Mul, unused) \
83 M(Div, unused) \
84 M(Condition, unused) \
85 M(Compare, unused) \
86 M(BoundsCheck, unused) \
87 M(InstanceFieldGet, unused) \
88 M(InstanceFieldSet, unused) \
89 M(InstanceOf, unused) \
90 M(Invoke, unused) \
91 M(LoadString, unused) \
92 M(NewArray, unused) \
93 M(NewInstance, unused) \
94 M(Rem, unused) \
95 M(StaticFieldGet, unused) \
96 M(StaticFieldSet, unused) \
97 M(SuspendCheck, unused) \
98 M(TypeConversion, unused)
99
100 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
101 M(BitwiseNegatedRight, unused) \
102 M(MultiplyAccumulate, unused) \
103 M(IntermediateAddress, unused) \
104 M(IntermediateAddressIndex, unused) \
105 M(DataProcWithShifterOp, unused)
106
107 #define DECLARE_VISIT_INSTRUCTION(type, unused) \
108 void Visit##type(H##type* instruction) override;
109
110 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
111 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
112 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
113
114 #undef DECLARE_VISIT_INSTRUCTION
115
116 private:
117 bool CanGenerateTest(HCondition* cond);
118 void HandleGenerateConditionWithZero(IfCondition cond);
119 void HandleGenerateLongTestConstant(HCondition* cond);
120 void HandleGenerateLongTest(HCondition* cond);
121 void HandleGenerateLongComparesAndJumps();
122 void HandleGenerateTest(HCondition* cond);
123 void HandleGenerateConditionGeneric(HCondition* cond);
124 void HandleGenerateEqualLong(HCondition* cond);
125 void HandleGenerateConditionLong(HCondition* cond);
126 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
127 void HandleCondition(HCondition* instr);
128 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
129 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
130 void HandleShiftLatencies(HBinaryOperation* instr);
131 void HandleDivRemConstantIntegralLatencies(int32_t imm);
132 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
133 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
134 void HandleGenerateDataProcInstruction(bool internal_latency = false);
135 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
136 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
137
138 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
139 // latency visitors may query CodeGenerator for such information for accurate latency settings.
140 CodeGeneratorARMVIXL* codegen_;
141 };
142
HandleBinaryOperationLantencies(HBinaryOperation * instr)143 void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
144 switch (instr->GetResultType()) {
145 case DataType::Type::kInt64:
146 // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
147 // so a bubble (kArmNopLatency) is added to represent the internal carry flag
148 // dependency inside these pairs.
149 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
150 last_visited_latency_ = kArmIntegerOpLatency;
151 break;
152 case DataType::Type::kFloat32:
153 case DataType::Type::kFloat64:
154 last_visited_latency_ = kArmFloatingPointOpLatency;
155 break;
156 default:
157 last_visited_latency_ = kArmIntegerOpLatency;
158 break;
159 }
160 }
161
VisitAdd(HAdd * instr)162 void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
163 HandleBinaryOperationLantencies(instr);
164 }
165
VisitSub(HSub * instr)166 void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
167 HandleBinaryOperationLantencies(instr);
168 }
169
VisitMul(HMul * instr)170 void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
171 switch (instr->GetResultType()) {
172 case DataType::Type::kInt64:
173 last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
174 last_visited_latency_ = kArmIntegerOpLatency;
175 break;
176 case DataType::Type::kFloat32:
177 case DataType::Type::kFloat64:
178 last_visited_latency_ = kArmMulFloatingPointLatency;
179 break;
180 default:
181 last_visited_latency_ = kArmMulIntegerLatency;
182 break;
183 }
184 }
185
HandleBitwiseOperationLantencies(HBinaryOperation * instr)186 void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
187 switch (instr->GetResultType()) {
188 case DataType::Type::kInt64:
189 last_visited_internal_latency_ = kArmIntegerOpLatency;
190 last_visited_latency_ = kArmIntegerOpLatency;
191 break;
192 case DataType::Type::kFloat32:
193 case DataType::Type::kFloat64:
194 last_visited_latency_ = kArmFloatingPointOpLatency;
195 break;
196 default:
197 last_visited_latency_ = kArmIntegerOpLatency;
198 break;
199 }
200 }
201
VisitAnd(HAnd * instr)202 void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
203 HandleBitwiseOperationLantencies(instr);
204 }
205
VisitOr(HOr * instr)206 void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
207 HandleBitwiseOperationLantencies(instr);
208 }
209
VisitXor(HXor * instr)210 void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
211 HandleBitwiseOperationLantencies(instr);
212 }
213
VisitRor(HRor * instr)214 void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
215 switch (instr->GetResultType()) {
216 case DataType::Type::kInt32:
217 last_visited_latency_ = kArmIntegerOpLatency;
218 break;
219 case DataType::Type::kInt64: {
220 // HandleLongRotate
221 HInstruction* rhs = instr->GetRight();
222 if (rhs->IsConstant()) {
223 uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
224 if (rot != 0u) {
225 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
226 last_visited_latency_ = kArmIntegerOpLatency;
227 } else {
228 last_visited_internal_latency_ = kArmIntegerOpLatency;
229 last_visited_latency_ = kArmIntegerOpLatency;
230 }
231 } else {
232 last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
233 last_visited_latency_ = kArmBranchLatency;
234 }
235 break;
236 }
237 default:
238 LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
239 UNREACHABLE();
240 }
241 }
242
HandleShiftLatencies(HBinaryOperation * instr)243 void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
244 DataType::Type type = instr->GetResultType();
245 HInstruction* rhs = instr->GetRight();
246 switch (type) {
247 case DataType::Type::kInt32:
248 if (!rhs->IsConstant()) {
249 last_visited_internal_latency_ = kArmIntegerOpLatency;
250 }
251 last_visited_latency_ = kArmIntegerOpLatency;
252 break;
253 case DataType::Type::kInt64:
254 if (!rhs->IsConstant()) {
255 last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
256 } else {
257 uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
258 if (shift_value == 1 || shift_value >= 32) {
259 last_visited_internal_latency_ = kArmIntegerOpLatency;
260 } else {
261 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
262 }
263 }
264 last_visited_latency_ = kArmIntegerOpLatency;
265 break;
266 default:
267 LOG(FATAL) << "Unexpected operation type " << type;
268 UNREACHABLE();
269 }
270 }
271
VisitShl(HShl * instr)272 void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
273 HandleShiftLatencies(instr);
274 }
275
VisitShr(HShr * instr)276 void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
277 HandleShiftLatencies(instr);
278 }
279
VisitUShr(HUShr * instr)280 void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
281 HandleShiftLatencies(instr);
282 }
283
HandleGenerateConditionWithZero(IfCondition condition)284 void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
285 switch (condition) {
286 case kCondEQ:
287 case kCondBE:
288 case kCondNE:
289 case kCondA:
290 last_visited_internal_latency_ += kArmIntegerOpLatency;
291 last_visited_latency_ = kArmIntegerOpLatency;
292 break;
293 case kCondGE:
294 // Mvn
295 last_visited_internal_latency_ += kArmIntegerOpLatency;
296 FALLTHROUGH_INTENDED;
297 case kCondLT:
298 // Lsr
299 last_visited_latency_ = kArmIntegerOpLatency;
300 break;
301 case kCondAE:
302 // Trivially true.
303 // Mov
304 last_visited_latency_ = kArmIntegerOpLatency;
305 break;
306 case kCondB:
307 // Trivially false.
308 // Mov
309 last_visited_latency_ = kArmIntegerOpLatency;
310 break;
311 default:
312 LOG(FATAL) << "Unexpected condition " << condition;
313 UNREACHABLE();
314 }
315 }
316
HandleGenerateLongTestConstant(HCondition * condition)317 void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
318 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
319
320 IfCondition cond = condition->GetCondition();
321
322 HInstruction* right = condition->InputAt(1);
323
324 int64_t value = Uint64ConstantFrom(right);
325
326 // Comparisons against 0 are common enough, so codegen has special handling for them.
327 if (value == 0) {
328 switch (cond) {
329 case kCondNE:
330 case kCondA:
331 case kCondEQ:
332 case kCondBE:
333 // Orrs
334 last_visited_internal_latency_ += kArmIntegerOpLatency;
335 return;
336 case kCondLT:
337 case kCondGE:
338 // Cmp
339 last_visited_internal_latency_ += kArmIntegerOpLatency;
340 return;
341 case kCondB:
342 case kCondAE:
343 // Cmp
344 last_visited_internal_latency_ += kArmIntegerOpLatency;
345 return;
346 default:
347 break;
348 }
349 }
350
351 switch (cond) {
352 case kCondEQ:
353 case kCondNE:
354 case kCondB:
355 case kCondBE:
356 case kCondA:
357 case kCondAE: {
358 // Cmp, IT, Cmp
359 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
360 break;
361 }
362 case kCondLE:
363 case kCondGT:
364 // Trivially true or false.
365 if (value == std::numeric_limits<int64_t>::max()) {
366 // Cmp
367 last_visited_internal_latency_ += kArmIntegerOpLatency;
368 break;
369 }
370 FALLTHROUGH_INTENDED;
371 case kCondGE:
372 case kCondLT: {
373 // Cmp, Sbcs
374 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
375 break;
376 }
377 default:
378 LOG(FATAL) << "Unreachable";
379 UNREACHABLE();
380 }
381 }
382
HandleGenerateLongTest(HCondition * condition)383 void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
384 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
385
386 IfCondition cond = condition->GetCondition();
387
388 switch (cond) {
389 case kCondEQ:
390 case kCondNE:
391 case kCondB:
392 case kCondBE:
393 case kCondA:
394 case kCondAE: {
395 // Cmp, IT, Cmp
396 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
397 break;
398 }
399 case kCondLE:
400 case kCondGT:
401 case kCondGE:
402 case kCondLT: {
403 // Cmp, Sbcs
404 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
405 break;
406 }
407 default:
408 LOG(FATAL) << "Unreachable";
409 UNREACHABLE();
410 }
411 }
412
413 // The GenerateTest series of function all counted as internal latency.
HandleGenerateTest(HCondition * condition)414 void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
415 const DataType::Type type = condition->GetLeft()->GetType();
416
417 if (type == DataType::Type::kInt64) {
418 condition->InputAt(1)->IsConstant()
419 ? HandleGenerateLongTestConstant(condition)
420 : HandleGenerateLongTest(condition);
421 } else if (DataType::IsFloatingPointType(type)) {
422 // GenerateVcmp + Vmrs
423 last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
424 } else {
425 // Cmp
426 last_visited_internal_latency_ += kArmIntegerOpLatency;
427 }
428 }
429
CanGenerateTest(HCondition * condition)430 bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
431 if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
432 HInstruction* right = condition->InputAt(1);
433
434 if (right->IsConstant()) {
435 IfCondition c = condition->GetCondition();
436 const uint64_t value = Uint64ConstantFrom(right);
437
438 if (c < kCondLT || c > kCondGE) {
439 if (value != 0) {
440 return false;
441 }
442 } else if (c == kCondLE || c == kCondGT) {
443 if (value < std::numeric_limits<int64_t>::max() &&
444 !codegen_->GetAssembler()->ShifterOperandCanHold(
445 SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
446 return false;
447 }
448 } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
449 SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
450 return false;
451 }
452 }
453 }
454
455 return true;
456 }
457
HandleGenerateConditionGeneric(HCondition * cond)458 void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
459 HandleGenerateTest(cond);
460
461 // Unlike codegen pass, we cannot check 'out' register IsLow() here,
462 // because scheduling is before liveness(location builder) and register allocator,
463 // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
464 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
465 last_visited_latency_ = kArmIntegerOpLatency;
466 }
467
HandleGenerateEqualLong(HCondition * cond)468 void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
469 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
470
471 IfCondition condition = cond->GetCondition();
472
473 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
474
475 if (condition == kCondNE) {
476 // Orrs, IT, Mov
477 last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
478 } else {
479 last_visited_internal_latency_ += kArmIntegerOpLatency;
480 HandleGenerateConditionWithZero(condition);
481 }
482 }
483
HandleGenerateLongComparesAndJumps()484 void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
485 last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
486 last_visited_internal_latency_ += kArmBranchLatency;
487 }
488
HandleGenerateConditionLong(HCondition * cond)489 void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
490 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
491
492 IfCondition condition = cond->GetCondition();
493 HInstruction* right = cond->InputAt(1);
494
495 if (right->IsConstant()) {
496 // Comparisons against 0 are common enough, so codegen has special handling for them.
497 if (Uint64ConstantFrom(right) == 0) {
498 switch (condition) {
499 case kCondNE:
500 case kCondA:
501 case kCondEQ:
502 case kCondBE:
503 // Orr
504 last_visited_internal_latency_ += kArmIntegerOpLatency;
505 HandleGenerateConditionWithZero(condition);
506 return;
507 case kCondLT:
508 case kCondGE:
509 FALLTHROUGH_INTENDED;
510 case kCondAE:
511 case kCondB:
512 HandleGenerateConditionWithZero(condition);
513 return;
514 case kCondLE:
515 case kCondGT:
516 default:
517 break;
518 }
519 }
520 }
521
522 if ((condition == kCondEQ || condition == kCondNE) &&
523 !CanGenerateTest(cond)) {
524 HandleGenerateEqualLong(cond);
525 return;
526 }
527
528 if (CanGenerateTest(cond)) {
529 HandleGenerateConditionGeneric(cond);
530 return;
531 }
532
533 HandleGenerateLongComparesAndJumps();
534
535 last_visited_internal_latency_ += kArmIntegerOpLatency;
536 last_visited_latency_ = kArmBranchLatency;;
537 }
538
HandleGenerateConditionIntegralOrNonPrimitive(HCondition * cond)539 void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
540 const DataType::Type type = cond->GetLeft()->GetType();
541
542 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
543
544 if (type == DataType::Type::kInt64) {
545 HandleGenerateConditionLong(cond);
546 return;
547 }
548
549 IfCondition condition = cond->GetCondition();
550 HInstruction* right = cond->InputAt(1);
551 int64_t value;
552
553 if (right->IsConstant()) {
554 value = Uint64ConstantFrom(right);
555
556 // Comparisons against 0 are common enough, so codegen has special handling for them.
557 if (value == 0) {
558 switch (condition) {
559 case kCondNE:
560 case kCondA:
561 case kCondEQ:
562 case kCondBE:
563 case kCondLT:
564 case kCondGE:
565 case kCondAE:
566 case kCondB:
567 HandleGenerateConditionWithZero(condition);
568 return;
569 case kCondLE:
570 case kCondGT:
571 default:
572 break;
573 }
574 }
575 }
576
577 if (condition == kCondEQ || condition == kCondNE) {
578 if (condition == kCondNE) {
579 // CMP, IT, MOV.ne
580 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
581 last_visited_latency_ = kArmIntegerOpLatency;
582 } else {
583 last_visited_internal_latency_ += kArmIntegerOpLatency;
584 HandleGenerateConditionWithZero(condition);
585 }
586 return;
587 }
588
589 HandleGenerateConditionGeneric(cond);
590 }
591
HandleCondition(HCondition * cond)592 void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
593 if (cond->IsEmittedAtUseSite()) {
594 last_visited_latency_ = 0;
595 return;
596 }
597
598 const DataType::Type type = cond->GetLeft()->GetType();
599
600 if (DataType::IsFloatingPointType(type)) {
601 HandleGenerateConditionGeneric(cond);
602 return;
603 }
604
605 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
606
607 const IfCondition condition = cond->GetCondition();
608
609 if (type == DataType::Type::kBool &&
610 cond->GetRight()->GetType() == DataType::Type::kBool &&
611 (condition == kCondEQ || condition == kCondNE)) {
612 if (condition == kCondEQ) {
613 last_visited_internal_latency_ = kArmIntegerOpLatency;
614 }
615 last_visited_latency_ = kArmIntegerOpLatency;
616 return;
617 }
618
619 HandleGenerateConditionIntegralOrNonPrimitive(cond);
620 }
621
VisitCondition(HCondition * instr)622 void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
623 HandleCondition(instr);
624 }
625
VisitCompare(HCompare * instr)626 void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
627 DataType::Type type = instr->InputAt(0)->GetType();
628 switch (type) {
629 case DataType::Type::kBool:
630 case DataType::Type::kUint8:
631 case DataType::Type::kInt8:
632 case DataType::Type::kUint16:
633 case DataType::Type::kInt16:
634 case DataType::Type::kInt32:
635 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
636 break;
637 case DataType::Type::kInt64:
638 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
639 break;
640 case DataType::Type::kFloat32:
641 case DataType::Type::kFloat64:
642 last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
643 break;
644 default:
645 last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
646 break;
647 }
648 last_visited_latency_ = kArmIntegerOpLatency;
649 }
650
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)651 void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
652 if (instruction->GetResultType() == DataType::Type::kInt32) {
653 last_visited_latency_ = kArmIntegerOpLatency;
654 } else {
655 last_visited_internal_latency_ = kArmIntegerOpLatency;
656 last_visited_latency_ = kArmIntegerOpLatency;
657 }
658 }
659
HandleGenerateDataProcInstruction(bool internal_latency)660 void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
661 if (internal_latency) {
662 last_visited_internal_latency_ += kArmIntegerOpLatency;
663 } else {
664 last_visited_latency_ = kArmDataProcWithShifterOpLatency;
665 }
666 }
667
HandleGenerateDataProc(HDataProcWithShifterOp * instruction)668 void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
669 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
670 if (kind == HInstruction::kAdd) {
671 last_visited_internal_latency_ = kArmIntegerOpLatency;
672 last_visited_latency_ = kArmIntegerOpLatency;
673 } else if (kind == HInstruction::kSub) {
674 last_visited_internal_latency_ = kArmIntegerOpLatency;
675 last_visited_latency_ = kArmIntegerOpLatency;
676 } else {
677 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
678 HandleGenerateDataProcInstruction();
679 }
680 }
681
HandleGenerateLongDataProc(HDataProcWithShifterOp * instruction)682 void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
683 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
684 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
685
686 const uint32_t shift_value = instruction->GetShiftAmount();
687 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
688
689 if (shift_value >= 32) {
690 // Different shift types actually generate similar code here,
691 // no need to differentiate shift types like the codegen pass does,
692 // which also avoids handling shift types from different ARM backends.
693 HandleGenerateDataProc(instruction);
694 } else {
695 DCHECK_GT(shift_value, 1U);
696 DCHECK_LT(shift_value, 32U);
697
698 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
699 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
700 HandleGenerateDataProcInstruction(/* internal_latency= */ true);
701 HandleGenerateDataProcInstruction();
702 } else {
703 last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
704 HandleGenerateDataProc(instruction);
705 }
706 }
707 }
708
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)709 void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
710 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
711
712 if (instruction->GetType() == DataType::Type::kInt32) {
713 HandleGenerateDataProcInstruction();
714 } else {
715 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
716 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
717 HandleGenerateDataProc(instruction);
718 } else {
719 HandleGenerateLongDataProc(instruction);
720 }
721 }
722 }
723
VisitIntermediateAddress(HIntermediateAddress *)724 void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) {
725 // Although the code generated is a simple `add` instruction, we found through empirical results
726 // that spacing it from its use in memory accesses was beneficial.
727 last_visited_internal_latency_ = kArmNopLatency;
728 last_visited_latency_ = kArmIntegerOpLatency;
729 }
730
VisitIntermediateAddressIndex(HIntermediateAddressIndex *)731 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
732 [[maybe_unused]] HIntermediateAddressIndex*) {
733 UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
734 }
735
VisitMultiplyAccumulate(HMultiplyAccumulate *)736 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
737 last_visited_latency_ = kArmMulIntegerLatency;
738 }
739
VisitArrayGet(HArrayGet * instruction)740 void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
741 DataType::Type type = instruction->GetType();
742 const bool maybe_compressed_char_at =
743 mirror::kUseStringCompression && instruction->IsStringCharAt();
744 HInstruction* array_instr = instruction->GetArray();
745 bool has_intermediate_address = array_instr->IsIntermediateAddress();
746 HInstruction* index = instruction->InputAt(1);
747
748 switch (type) {
749 case DataType::Type::kBool:
750 case DataType::Type::kUint8:
751 case DataType::Type::kInt8:
752 case DataType::Type::kUint16:
753 case DataType::Type::kInt16:
754 case DataType::Type::kInt32: {
755 if (maybe_compressed_char_at) {
756 last_visited_internal_latency_ += kArmMemoryLoadLatency;
757 }
758 if (index->IsConstant()) {
759 if (maybe_compressed_char_at) {
760 last_visited_internal_latency_ +=
761 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
762 last_visited_latency_ = kArmBranchLatency;
763 } else {
764 last_visited_latency_ += kArmMemoryLoadLatency;
765 }
766 } else {
767 if (has_intermediate_address) {
768 } else {
769 last_visited_internal_latency_ += kArmIntegerOpLatency;
770 }
771 if (maybe_compressed_char_at) {
772 last_visited_internal_latency_ +=
773 kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
774 last_visited_latency_ = kArmBranchLatency;
775 } else {
776 last_visited_latency_ += kArmMemoryLoadLatency;
777 }
778 }
779 break;
780 }
781
782 case DataType::Type::kReference: {
783 if (codegen_->EmitBakerReadBarrier()) {
784 last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
785 } else {
786 if (index->IsConstant()) {
787 last_visited_latency_ = kArmMemoryLoadLatency;
788 } else {
789 if (has_intermediate_address) {
790 } else {
791 last_visited_internal_latency_ += kArmIntegerOpLatency;
792 }
793 last_visited_latency_ = kArmMemoryLoadLatency;
794 }
795 }
796 break;
797 }
798
799 case DataType::Type::kInt64: {
800 if (index->IsConstant()) {
801 last_visited_latency_ = kArmMemoryLoadLatency;
802 } else {
803 last_visited_internal_latency_ += kArmIntegerOpLatency;
804 last_visited_latency_ = kArmMemoryLoadLatency;
805 }
806 break;
807 }
808
809 case DataType::Type::kFloat32: {
810 if (index->IsConstant()) {
811 last_visited_latency_ = kArmMemoryLoadLatency;
812 } else {
813 last_visited_internal_latency_ += kArmIntegerOpLatency;
814 last_visited_latency_ = kArmMemoryLoadLatency;
815 }
816 break;
817 }
818
819 case DataType::Type::kFloat64: {
820 if (index->IsConstant()) {
821 last_visited_latency_ = kArmMemoryLoadLatency;
822 } else {
823 last_visited_internal_latency_ += kArmIntegerOpLatency;
824 last_visited_latency_ = kArmMemoryLoadLatency;
825 }
826 break;
827 }
828
829 default:
830 LOG(FATAL) << "Unreachable type " << type;
831 UNREACHABLE();
832 }
833 }
834
VisitArrayLength(HArrayLength * instruction)835 void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
836 last_visited_latency_ = kArmMemoryLoadLatency;
837 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
838 last_visited_internal_latency_ = kArmMemoryLoadLatency;
839 last_visited_latency_ = kArmIntegerOpLatency;
840 }
841 }
842
VisitArraySet(HArraySet * instruction)843 void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
844 HInstruction* index = instruction->InputAt(1);
845 DataType::Type value_type = instruction->GetComponentType();
846 HInstruction* array_instr = instruction->GetArray();
847 bool has_intermediate_address = array_instr->IsIntermediateAddress();
848
849 switch (value_type) {
850 case DataType::Type::kBool:
851 case DataType::Type::kUint8:
852 case DataType::Type::kInt8:
853 case DataType::Type::kUint16:
854 case DataType::Type::kInt16:
855 case DataType::Type::kInt32: {
856 if (index->IsConstant()) {
857 last_visited_latency_ = kArmMemoryStoreLatency;
858 } else {
859 if (has_intermediate_address) {
860 } else {
861 last_visited_internal_latency_ = kArmIntegerOpLatency;
862 }
863 last_visited_latency_ = kArmMemoryStoreLatency;
864 }
865 break;
866 }
867
868 case DataType::Type::kReference: {
869 if (instruction->InputAt(2)->IsNullConstant()) {
870 if (index->IsConstant()) {
871 last_visited_latency_ = kArmMemoryStoreLatency;
872 } else {
873 last_visited_internal_latency_ = kArmIntegerOpLatency;
874 last_visited_latency_ = kArmMemoryStoreLatency;
875 }
876 } else {
877 // Following the exact instructions of runtime type checks is too complicated,
878 // just giving it a simple slow latency.
879 last_visited_latency_ = kArmRuntimeTypeCheckLatency;
880 }
881 break;
882 }
883
884 case DataType::Type::kInt64: {
885 if (index->IsConstant()) {
886 last_visited_latency_ = kArmMemoryLoadLatency;
887 } else {
888 last_visited_internal_latency_ = kArmIntegerOpLatency;
889 last_visited_latency_ = kArmMemoryLoadLatency;
890 }
891 break;
892 }
893
894 case DataType::Type::kFloat32: {
895 if (index->IsConstant()) {
896 last_visited_latency_ = kArmMemoryLoadLatency;
897 } else {
898 last_visited_internal_latency_ = kArmIntegerOpLatency;
899 last_visited_latency_ = kArmMemoryLoadLatency;
900 }
901 break;
902 }
903
904 case DataType::Type::kFloat64: {
905 if (index->IsConstant()) {
906 last_visited_latency_ = kArmMemoryLoadLatency;
907 } else {
908 last_visited_internal_latency_ = kArmIntegerOpLatency;
909 last_visited_latency_ = kArmMemoryLoadLatency;
910 }
911 break;
912 }
913
914 default:
915 LOG(FATAL) << "Unreachable type " << value_type;
916 UNREACHABLE();
917 }
918 }
919
VisitBoundsCheck(HBoundsCheck *)920 void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
921 last_visited_internal_latency_ = kArmIntegerOpLatency;
922 // Users do not use any data results.
923 last_visited_latency_ = 0;
924 }
925
HandleDivRemConstantIntegralLatencies(int32_t imm)926 void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
927 if (imm == 0) {
928 last_visited_internal_latency_ = 0;
929 last_visited_latency_ = 0;
930 } else if (imm == 1 || imm == -1) {
931 last_visited_latency_ = kArmIntegerOpLatency;
932 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
933 last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
934 last_visited_latency_ = kArmIntegerOpLatency;
935 } else {
936 last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
937 last_visited_latency_ = kArmIntegerOpLatency;
938 }
939 }
940
VisitDiv(HDiv * instruction)941 void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
942 DataType::Type type = instruction->GetResultType();
943 switch (type) {
944 case DataType::Type::kInt32: {
945 HInstruction* rhs = instruction->GetRight();
946 if (rhs->IsConstant()) {
947 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
948 HandleDivRemConstantIntegralLatencies(imm);
949 } else {
950 last_visited_latency_ = kArmDivIntegerLatency;
951 }
952 break;
953 }
954 case DataType::Type::kFloat32:
955 last_visited_latency_ = kArmDivFloatLatency;
956 break;
957 case DataType::Type::kFloat64:
958 last_visited_latency_ = kArmDivDoubleLatency;
959 break;
960 default:
961 last_visited_internal_latency_ = kArmCallInternalLatency;
962 last_visited_latency_ = kArmCallLatency;
963 break;
964 }
965 }
966
VisitInstanceFieldGet(HInstanceFieldGet * instruction)967 void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
968 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
969 }
970
VisitInstanceFieldSet(HInstanceFieldSet * instruction)971 void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
972 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
973 }
974
VisitInstanceOf(HInstanceOf *)975 void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
976 last_visited_internal_latency_ = kArmCallInternalLatency;
977 last_visited_latency_ = kArmIntegerOpLatency;
978 }
979
VisitInvoke(HInvoke *)980 void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) {
981 last_visited_internal_latency_ = kArmCallInternalLatency;
982 last_visited_latency_ = kArmCallLatency;
983 }
984
VisitLoadString(HLoadString *)985 void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) {
986 last_visited_internal_latency_ = kArmLoadStringInternalLatency;
987 last_visited_latency_ = kArmMemoryLoadLatency;
988 }
989
VisitNewArray(HNewArray *)990 void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) {
991 last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
992 last_visited_latency_ = kArmCallLatency;
993 }
994
VisitNewInstance(HNewInstance * instruction)995 void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
996 if (instruction->IsStringAlloc()) {
997 last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
998 } else {
999 last_visited_internal_latency_ = kArmCallInternalLatency;
1000 }
1001 last_visited_latency_ = kArmCallLatency;
1002 }
1003
VisitRem(HRem * instruction)1004 void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
1005 DataType::Type type = instruction->GetResultType();
1006 switch (type) {
1007 case DataType::Type::kInt32: {
1008 HInstruction* rhs = instruction->GetRight();
1009 if (rhs->IsConstant()) {
1010 int32_t imm = Int32ConstantFrom(rhs->AsConstant());
1011 HandleDivRemConstantIntegralLatencies(imm);
1012 } else {
1013 last_visited_internal_latency_ = kArmDivIntegerLatency;
1014 last_visited_latency_ = kArmMulIntegerLatency;
1015 }
1016 break;
1017 }
1018 default:
1019 last_visited_internal_latency_ = kArmCallInternalLatency;
1020 last_visited_latency_ = kArmCallLatency;
1021 break;
1022 }
1023 }
1024
HandleFieldGetLatencies(HInstruction * instruction,const FieldInfo & field_info)1025 void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
1026 const FieldInfo& field_info) {
1027 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1028 DCHECK(codegen_ != nullptr);
1029 bool is_volatile = field_info.IsVolatile();
1030 DataType::Type field_type = field_info.GetFieldType();
1031 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
1032
1033 switch (field_type) {
1034 case DataType::Type::kBool:
1035 case DataType::Type::kUint8:
1036 case DataType::Type::kInt8:
1037 case DataType::Type::kUint16:
1038 case DataType::Type::kInt16:
1039 case DataType::Type::kInt32:
1040 last_visited_latency_ = kArmMemoryLoadLatency;
1041 break;
1042
1043 case DataType::Type::kReference:
1044 if (codegen_->EmitBakerReadBarrier()) {
1045 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
1046 last_visited_latency_ = kArmMemoryLoadLatency;
1047 } else {
1048 last_visited_latency_ = kArmMemoryLoadLatency;
1049 }
1050 break;
1051
1052 case DataType::Type::kInt64:
1053 if (is_volatile && !atomic_ldrd_strd) {
1054 last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
1055 last_visited_latency_ = kArmMemoryLoadLatency;
1056 } else {
1057 last_visited_latency_ = kArmMemoryLoadLatency;
1058 }
1059 break;
1060
1061 case DataType::Type::kFloat32:
1062 last_visited_latency_ = kArmMemoryLoadLatency;
1063 break;
1064
1065 case DataType::Type::kFloat64:
1066 if (is_volatile && !atomic_ldrd_strd) {
1067 last_visited_internal_latency_ =
1068 kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
1069 last_visited_latency_ = kArmIntegerOpLatency;
1070 } else {
1071 last_visited_latency_ = kArmMemoryLoadLatency;
1072 }
1073 break;
1074
1075 default:
1076 last_visited_latency_ = kArmMemoryLoadLatency;
1077 break;
1078 }
1079
1080 if (is_volatile) {
1081 last_visited_internal_latency_ += kArmMemoryBarrierLatency;
1082 }
1083 }
1084
HandleFieldSetLatencies(HInstruction * instruction,const FieldInfo & field_info)1085 void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
1086 const FieldInfo& field_info) {
1087 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
1088 DCHECK(codegen_ != nullptr);
1089 bool is_volatile = field_info.IsVolatile();
1090 DataType::Type field_type = field_info.GetFieldType();
1091 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
1092
1093 switch (field_type) {
1094 case DataType::Type::kBool:
1095 case DataType::Type::kUint8:
1096 case DataType::Type::kInt8:
1097 case DataType::Type::kUint16:
1098 case DataType::Type::kInt16:
1099 if (is_volatile) {
1100 last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
1101 last_visited_latency_ = kArmMemoryBarrierLatency;
1102 } else {
1103 last_visited_latency_ = kArmMemoryStoreLatency;
1104 }
1105 break;
1106
1107 case DataType::Type::kInt32:
1108 case DataType::Type::kReference:
1109 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
1110 last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
1111 }
1112 last_visited_latency_ = kArmMemoryStoreLatency;
1113 break;
1114
1115 case DataType::Type::kInt64:
1116 if (is_volatile && !atomic_ldrd_strd) {
1117 last_visited_internal_latency_ =
1118 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1119 last_visited_latency_ = kArmIntegerOpLatency;
1120 } else {
1121 last_visited_latency_ = kArmMemoryStoreLatency;
1122 }
1123 break;
1124
1125 case DataType::Type::kFloat32:
1126 last_visited_latency_ = kArmMemoryStoreLatency;
1127 break;
1128
1129 case DataType::Type::kFloat64:
1130 if (is_volatile && !atomic_ldrd_strd) {
1131 last_visited_internal_latency_ = kArmIntegerOpLatency +
1132 kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
1133 last_visited_latency_ = kArmIntegerOpLatency;
1134 } else {
1135 last_visited_latency_ = kArmMemoryStoreLatency;
1136 }
1137 break;
1138
1139 default:
1140 last_visited_latency_ = kArmMemoryStoreLatency;
1141 break;
1142 }
1143 }
1144
VisitStaticFieldGet(HStaticFieldGet * instruction)1145 void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
1146 HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
1147 }
1148
VisitStaticFieldSet(HStaticFieldSet * instruction)1149 void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
1150 HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
1151 }
1152
VisitSuspendCheck(HSuspendCheck * instruction)1153 void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
1154 HBasicBlock* block = instruction->GetBlock();
1155 DCHECK_IMPLIES(block->GetLoopInformation() == nullptr,
1156 block->IsEntryBlock() && instruction->GetNext()->IsGoto());
1157 // Users do not use any data results.
1158 last_visited_latency_ = 0;
1159 }
1160
VisitTypeConversion(HTypeConversion * instr)1161 void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
1162 DataType::Type result_type = instr->GetResultType();
1163 DataType::Type input_type = instr->GetInputType();
1164
1165 switch (result_type) {
1166 case DataType::Type::kUint8:
1167 case DataType::Type::kInt8:
1168 case DataType::Type::kUint16:
1169 case DataType::Type::kInt16:
1170 last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
1171 break;
1172
1173 case DataType::Type::kInt32:
1174 switch (input_type) {
1175 case DataType::Type::kInt64:
1176 last_visited_latency_ = kArmIntegerOpLatency; // MOV
1177 break;
1178 case DataType::Type::kFloat32:
1179 case DataType::Type::kFloat64:
1180 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1181 last_visited_latency_ = kArmFloatingPointOpLatency;
1182 break;
1183 default:
1184 last_visited_latency_ = kArmIntegerOpLatency;
1185 break;
1186 }
1187 break;
1188
1189 case DataType::Type::kInt64:
1190 switch (input_type) {
1191 case DataType::Type::kBool:
1192 case DataType::Type::kUint8:
1193 case DataType::Type::kInt8:
1194 case DataType::Type::kUint16:
1195 case DataType::Type::kInt16:
1196 case DataType::Type::kInt32:
1197 // MOV and extension
1198 last_visited_internal_latency_ = kArmIntegerOpLatency;
1199 last_visited_latency_ = kArmIntegerOpLatency;
1200 break;
1201 case DataType::Type::kFloat32:
1202 case DataType::Type::kFloat64:
1203 // invokes runtime
1204 last_visited_internal_latency_ = kArmCallInternalLatency;
1205 break;
1206 default:
1207 last_visited_internal_latency_ = kArmIntegerOpLatency;
1208 last_visited_latency_ = kArmIntegerOpLatency;
1209 break;
1210 }
1211 break;
1212
1213 case DataType::Type::kFloat32:
1214 switch (input_type) {
1215 case DataType::Type::kBool:
1216 case DataType::Type::kUint8:
1217 case DataType::Type::kInt8:
1218 case DataType::Type::kUint16:
1219 case DataType::Type::kInt16:
1220 case DataType::Type::kInt32:
1221 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1222 last_visited_latency_ = kArmFloatingPointOpLatency;
1223 break;
1224 case DataType::Type::kInt64:
1225 // invokes runtime
1226 last_visited_internal_latency_ = kArmCallInternalLatency;
1227 break;
1228 case DataType::Type::kFloat64:
1229 last_visited_latency_ = kArmFloatingPointOpLatency;
1230 break;
1231 default:
1232 last_visited_latency_ = kArmFloatingPointOpLatency;
1233 break;
1234 }
1235 break;
1236
1237 case DataType::Type::kFloat64:
1238 switch (input_type) {
1239 case DataType::Type::kBool:
1240 case DataType::Type::kUint8:
1241 case DataType::Type::kInt8:
1242 case DataType::Type::kUint16:
1243 case DataType::Type::kInt16:
1244 case DataType::Type::kInt32:
1245 last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1246 last_visited_latency_ = kArmFloatingPointOpLatency;
1247 break;
1248 case DataType::Type::kInt64:
1249 last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
1250 last_visited_latency_ = kArmFloatingPointOpLatency;
1251 break;
1252 case DataType::Type::kFloat32:
1253 last_visited_latency_ = kArmFloatingPointOpLatency;
1254 break;
1255 default:
1256 last_visited_latency_ = kArmFloatingPointOpLatency;
1257 break;
1258 }
1259 break;
1260
1261 default:
1262 last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
1263 break;
1264 }
1265 }
1266
IsSchedulable(const HInstruction * instruction) const1267 bool HSchedulerARM::IsSchedulable(const HInstruction* instruction) const {
1268 switch (instruction->GetKind()) {
1269 #define SCHEDULABLE_CASE(type, unused) \
1270 case HInstruction::InstructionKind::k##type: \
1271 return true;
1272 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE)
1273 FOR_EACH_CONCRETE_INSTRUCTION_ARM(SCHEDULABLE_CASE)
1274 #undef SCHEDULABLE_CASE
1275
1276 default:
1277 return HScheduler::IsSchedulable(instruction);
1278 }
1279 }
1280
BuildSchedulingGraph(HBasicBlock * block,ScopedArenaAllocator * allocator,const HeapLocationCollector * heap_location_collector)1281 std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> HSchedulerARM::BuildSchedulingGraph(
1282 HBasicBlock* block,
1283 ScopedArenaAllocator* allocator,
1284 const HeapLocationCollector* heap_location_collector) {
1285 SchedulingLatencyVisitorARM latency_visitor(codegen_);
1286 return HScheduler::BuildSchedulingGraph(
1287 block, allocator, heap_location_collector, &latency_visitor);
1288 }
1289
1290 } // namespace arm
1291 } // namespace art
1292