• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
19 
20 #include "base/macros.h"
21 #include "scheduler.h"
22 
23 namespace art HIDDEN {
24 namespace arm64 {
25 
26 static constexpr uint32_t kArm64MemoryLoadLatency = 5;
27 static constexpr uint32_t kArm64MemoryStoreLatency = 3;
28 
29 static constexpr uint32_t kArm64CallInternalLatency = 10;
30 static constexpr uint32_t kArm64CallLatency = 5;
31 
32 // AArch64 instruction latency.
33 // We currently assume that all arm64 CPUs share the same instruction latency list.
34 static constexpr uint32_t kArm64IntegerOpLatency = 2;
35 static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
36 
37 
38 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
39 static constexpr uint32_t kArm64DivDoubleLatency = 30;
40 static constexpr uint32_t kArm64DivFloatLatency = 15;
41 static constexpr uint32_t kArm64DivIntegerLatency = 5;
42 static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
43 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
44 static constexpr uint32_t kArm64MulIntegerLatency = 6;
45 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
46 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
47 
48 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
49 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
50 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
51 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
52 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
53 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
54 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
55 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
56 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
57 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
58 
59 class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
60  public:
61   // Default visitor for instructions not handled specifically below.
VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)62   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
63     last_visited_latency_ = kArm64IntegerOpLatency;
64   }
65 
66 // We add a second unused parameter to be able to use this macro like the others
67 // defined in `nodes.h`.
68 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
69   M(ArrayGet             , unused)                   \
70   M(ArrayLength          , unused)                   \
71   M(ArraySet             , unused)                   \
72   M(BoundsCheck          , unused)                   \
73   M(Div                  , unused)                   \
74   M(InstanceFieldGet     , unused)                   \
75   M(InstanceOf           , unused)                   \
76   M(LoadString           , unused)                   \
77   M(Mul                  , unused)                   \
78   M(NewArray             , unused)                   \
79   M(NewInstance          , unused)                   \
80   M(Rem                  , unused)                   \
81   M(StaticFieldGet       , unused)                   \
82   M(SuspendCheck         , unused)                   \
83   M(TypeConversion       , unused)                   \
84   M(VecReplicateScalar   , unused)                   \
85   M(VecExtractScalar     , unused)                   \
86   M(VecReduce            , unused)                   \
87   M(VecCnv               , unused)                   \
88   M(VecNeg               , unused)                   \
89   M(VecAbs               , unused)                   \
90   M(VecNot               , unused)                   \
91   M(VecAdd               , unused)                   \
92   M(VecHalvingAdd        , unused)                   \
93   M(VecSub               , unused)                   \
94   M(VecMul               , unused)                   \
95   M(VecDiv               , unused)                   \
96   M(VecMin               , unused)                   \
97   M(VecMax               , unused)                   \
98   M(VecAnd               , unused)                   \
99   M(VecAndNot            , unused)                   \
100   M(VecOr                , unused)                   \
101   M(VecXor               , unused)                   \
102   M(VecShl               , unused)                   \
103   M(VecShr               , unused)                   \
104   M(VecUShr              , unused)                   \
105   M(VecSetScalars        , unused)                   \
106   M(VecMultiplyAccumulate, unused)                   \
107   M(VecLoad              , unused)                   \
108   M(VecStore             , unused)
109 
110 #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M)   \
111   M(BinaryOperation      , unused)                   \
112   M(Invoke               , unused)
113 
114 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
115   M(BitwiseNegatedRight, unused)                 \
116   M(MultiplyAccumulate, unused)                  \
117   M(IntermediateAddress, unused)                 \
118   M(IntermediateAddressIndex, unused)            \
119   M(DataProcWithShifterOp, unused)
120 
121 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
122   void Visit##type(H##type* instruction) override;
123 
124   FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
125   FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
126   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
127   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
128 
129 #undef DECLARE_VISIT_INSTRUCTION
130 
131  private:
132   void HandleSimpleArithmeticSIMD(HVecOperation *instr);
133   void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
134 };
135 
136 class HSchedulerARM64 : public HScheduler {
137  public:
HSchedulerARM64(SchedulingNodeSelector * selector)138   explicit HSchedulerARM64(SchedulingNodeSelector* selector)
139       : HScheduler(&arm64_latency_visitor_, selector) {}
~HSchedulerARM64()140   ~HSchedulerARM64() override {}
141 
IsSchedulable(const HInstruction * instruction)142   bool IsSchedulable(const HInstruction* instruction) const override {
143 #define CASE_INSTRUCTION_KIND(type, unused) case \
144   HInstruction::InstructionKind::k##type:
145     switch (instruction->GetKind()) {
146       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
147         return true;
148       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
149         return true;
150       FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
151         return true;
152       default:
153         return HScheduler::IsSchedulable(instruction);
154     }
155 #undef CASE_INSTRUCTION_KIND
156   }
157 
158   // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
159   // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
160   // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
161   // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
162   //
163   // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
IsSchedulingBarrier(const HInstruction * instr)164   bool IsSchedulingBarrier(const HInstruction* instr) const override {
165     return HScheduler::IsSchedulingBarrier(instr) ||
166            instr->IsVecReduce() ||
167            instr->IsVecExtractScalar() ||
168            instr->IsVecSetScalars() ||
169            instr->IsVecReplicateScalar();
170   }
171 
172  private:
173   SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
174   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
175 };
176 
177 }  // namespace arm64
178 }  // namespace art
179 
180 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
181