• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19 
20 #include "base/macros.h"
21 #include "code_generator_arm_vixl.h"
22 #include "scheduler.h"
23 
24 namespace art HIDDEN {
25 namespace arm {
26 // AArch32 instruction latencies.
27 // We currently assume that all ARM CPUs share the same instruction latency list.
28 // The following latencies were tuned based on performance experiments and
29 // automatic tuning using differential evolution approach on various benchmarks.
30 static constexpr uint32_t kArmIntegerOpLatency = 2;
31 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
32 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
33 static constexpr uint32_t kArmMulIntegerLatency = 6;
34 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
35 static constexpr uint32_t kArmDivIntegerLatency = 10;
36 static constexpr uint32_t kArmDivFloatLatency = 20;
37 static constexpr uint32_t kArmDivDoubleLatency = 25;
38 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
39 static constexpr uint32_t kArmMemoryLoadLatency = 9;
40 static constexpr uint32_t kArmMemoryStoreLatency = 9;
41 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
42 static constexpr uint32_t kArmBranchLatency = 4;
43 static constexpr uint32_t kArmCallLatency = 5;
44 static constexpr uint32_t kArmCallInternalLatency = 29;
45 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
46 static constexpr uint32_t kArmNopLatency = 2;
47 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
48 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
49 
50 class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
51  public:
SchedulingLatencyVisitorARM(CodeGenerator * codegen)52   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
53       : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
54 
55   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)56   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
57     last_visited_latency_ = kArmIntegerOpLatency;
58   }
59 
60 // We add a second unused parameter to be able to use this macro like the others
61 // defined in `nodes.h`.
62 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
63   M(ArrayGet, unused)                         \
64   M(ArrayLength, unused)                      \
65   M(ArraySet, unused)                         \
66   M(Add, unused)                              \
67   M(Sub, unused)                              \
68   M(And, unused)                              \
69   M(Or, unused)                               \
70   M(Ror, unused)                              \
71   M(Xor, unused)                              \
72   M(Shl, unused)                              \
73   M(Shr, unused)                              \
74   M(UShr, unused)                             \
75   M(Mul, unused)                              \
76   M(Div, unused)                              \
77   M(Condition, unused)                        \
78   M(Compare, unused)                          \
79   M(BoundsCheck, unused)                      \
80   M(PredicatedInstanceFieldGet, unused)       \
81   M(InstanceFieldGet, unused)                 \
82   M(InstanceFieldSet, unused)                 \
83   M(InstanceOf, unused)                       \
84   M(Invoke, unused)                           \
85   M(LoadString, unused)                       \
86   M(NewArray, unused)                         \
87   M(NewInstance, unused)                      \
88   M(Rem, unused)                              \
89   M(StaticFieldGet, unused)                   \
90   M(StaticFieldSet, unused)                   \
91   M(SuspendCheck, unused)                     \
92   M(TypeConversion, unused)
93 
94 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
95   M(BitwiseNegatedRight, unused)                 \
96   M(MultiplyAccumulate, unused)                  \
97   M(IntermediateAddress, unused)                 \
98   M(IntermediateAddressIndex, unused)            \
99   M(DataProcWithShifterOp, unused)
100 
101 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
102   void Visit##type(H##type* instruction) override;
103 
104   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
105   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
106   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
107 
108 #undef DECLARE_VISIT_INSTRUCTION
109 
110  private:
111   bool CanGenerateTest(HCondition* cond);
112   void HandleGenerateConditionWithZero(IfCondition cond);
113   void HandleGenerateLongTestConstant(HCondition* cond);
114   void HandleGenerateLongTest(HCondition* cond);
115   void HandleGenerateLongComparesAndJumps();
116   void HandleGenerateTest(HCondition* cond);
117   void HandleGenerateConditionGeneric(HCondition* cond);
118   void HandleGenerateEqualLong(HCondition* cond);
119   void HandleGenerateConditionLong(HCondition* cond);
120   void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
121   void HandleCondition(HCondition* instr);
122   void HandleBinaryOperationLantencies(HBinaryOperation* instr);
123   void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
124   void HandleShiftLatencies(HBinaryOperation* instr);
125   void HandleDivRemConstantIntegralLatencies(int32_t imm);
126   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
127   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
128   void HandleGenerateDataProcInstruction(bool internal_latency = false);
129   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
130   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
131 
132   // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
133   // latency visitors may query CodeGenerator for such information for accurate latency settings.
134   CodeGeneratorARMVIXL* codegen_;
135 };
136 
137 class HSchedulerARM : public HScheduler {
138  public:
HSchedulerARM(SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)139   HSchedulerARM(SchedulingNodeSelector* selector,
140                 SchedulingLatencyVisitorARM* arm_latency_visitor)
141       : HScheduler(arm_latency_visitor, selector) {}
~HSchedulerARM()142   ~HSchedulerARM() override {}
143 
IsSchedulable(const HInstruction * instruction)144   bool IsSchedulable(const HInstruction* instruction) const override {
145 #define CASE_INSTRUCTION_KIND(type, unused) case \
146   HInstruction::InstructionKind::k##type:
147     switch (instruction->GetKind()) {
148       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
149         return true;
150       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
151         return true;
152       default:
153         return HScheduler::IsSchedulable(instruction);
154     }
155 #undef CASE_INSTRUCTION_KIND
156   }
157 
158  private:
159   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
160 };
161 
162 }  // namespace arm
163 }  // namespace art
164 
165 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
166