• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* This file contains codegen for the Thumb2 ISA. */
18 
19 #include "arm_lir.h"
20 #include "codegen_arm.h"
21 #include "dex/quick/mir_to_lir-inl.h"
22 #include "dex/reg_storage_eq.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "mirror/array.h"
25 
26 namespace art {
27 
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)28 LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
29   OpRegReg(kOpCmp, src1, src2);
30   return OpCondBranch(cond, target);
31 }
32 
33 /*
34  * Generate a Thumb2 IT instruction, which can nullify up to
35  * four subsequent instructions based on a condition and its
36  * inverse.  The condition applies to the first instruction, which
37  * is executed if the condition is met.  The string "guide" consists
38  * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
39  * A "T" means the instruction is executed if the condition is
40  * met, and an "E" means the instruction is executed if the condition
41  * is not met.
42  */
OpIT(ConditionCode ccode,const char * guide)43 LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) {
44   int mask;
45   int mask3 = 0;
46   int mask2 = 0;
47   int mask1 = 0;
48   ArmConditionCode code = ArmConditionEncoding(ccode);
49   int cond_bit = code & 1;
50   int alt_bit = cond_bit ^ 1;
51 
52   // Note: case fallthroughs intentional
53   switch (strlen(guide)) {
54     case 3:
55       mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
56     case 2:
57       mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
58     case 1:
59       mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
60       break;
61     case 0:
62       break;
63     default:
64       LOG(FATAL) << "OAT: bad case in OpIT";
65   }
66   mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
67        (1 << (3 - strlen(guide)));
68   return NewLIR2(kThumb2It, code, mask);
69 }
70 
UpdateIT(LIR * it,const char * new_guide)71 void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
72   int mask;
73   int mask3 = 0;
74   int mask2 = 0;
75   int mask1 = 0;
76   ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
77   int cond_bit = code & 1;
78   int alt_bit = cond_bit ^ 1;
79 
80   // Note: case fallthroughs intentional
81   switch (strlen(new_guide)) {
82     case 3:
83       mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
84     case 2:
85       mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
86     case 1:
87       mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
88       break;
89     case 0:
90       break;
91     default:
92       LOG(FATAL) << "OAT: bad case in UpdateIT";
93   }
94   mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
95       (1 << (3 - strlen(new_guide)));
96   it->operands[1] = mask;
97 }
98 
OpEndIT(LIR * it)99 void ArmMir2Lir::OpEndIT(LIR* it) {
100   // TODO: use the 'it' pointer to do some checks with the LIR, for example
101   //       we could check that the number of instructions matches the mask
102   //       in the IT instruction.
103   CHECK(it != nullptr);
104   GenBarrier();
105 }
106 
107 /*
108  * 64-bit 3way compare function.
109  *     mov   rX, #-1
110  *     cmp   op1hi, op2hi
111  *     blt   done
112  *     bgt   flip
113  *     sub   rX, op1lo, op2lo (treat as unsigned)
114  *     beq   done
115  *     ite   hi
116  *     mov(hi)   rX, #-1
117  *     mov(!hi)  rX, #1
118  * flip:
119  *     neg   rX
120  * done:
121  */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)122 void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
123   LIR* target1;
124   LIR* target2;
125   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
126   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
127   RegStorage t_reg = AllocTemp();
128   LoadConstant(t_reg, -1);
129   OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
130   LIR* branch1 = OpCondBranch(kCondLt, NULL);
131   LIR* branch2 = OpCondBranch(kCondGt, NULL);
132   OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
133   LIR* branch3 = OpCondBranch(kCondEq, NULL);
134 
135   LIR* it = OpIT(kCondHi, "E");
136   NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
137   LoadConstant(t_reg, 1);
138   OpEndIT(it);
139 
140   target2 = NewLIR0(kPseudoTargetLabel);
141   OpRegReg(kOpNeg, t_reg, t_reg);
142 
143   target1 = NewLIR0(kPseudoTargetLabel);
144 
145   RegLocation rl_temp = LocCReturn();  // Just using as template, will change
146   rl_temp.reg.SetReg(t_reg.GetReg());
147   StoreValue(rl_dest, rl_temp);
148   FreeTemp(t_reg);
149 
150   branch1->target = target1;
151   branch2->target = target2;
152   branch3->target = branch1->target;
153 }
154 
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)155 void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
156                                           int64_t val, ConditionCode ccode) {
157   int32_t val_lo = Low32Bits(val);
158   int32_t val_hi = High32Bits(val);
159   DCHECK_GE(ModifiedImmediate(val_lo), 0);
160   DCHECK_GE(ModifiedImmediate(val_hi), 0);
161   LIR* taken = &block_label_list_[bb->taken];
162   LIR* not_taken = &block_label_list_[bb->fall_through];
163   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
164   RegStorage low_reg = rl_src1.reg.GetLow();
165   RegStorage high_reg = rl_src1.reg.GetHigh();
166 
167   if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
168     RegStorage t_reg = AllocTemp();
169     NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
170     FreeTemp(t_reg);
171     OpCondBranch(ccode, taken);
172     return;
173   }
174 
175   switch (ccode) {
176     case kCondEq:
177     case kCondNe:
178       OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
179       break;
180     case kCondLt:
181       OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
182       OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
183       ccode = kCondUlt;
184       break;
185     case kCondLe:
186       OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
187       OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
188       ccode = kCondLs;
189       break;
190     case kCondGt:
191       OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
192       OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
193       ccode = kCondHi;
194       break;
195     case kCondGe:
196       OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
197       OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
198       ccode = kCondUge;
199       break;
200     default:
201       LOG(FATAL) << "Unexpected ccode: " << ccode;
202   }
203   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
204 }
205 
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,int dest_reg_class)206 void ArmMir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
207                                   int32_t true_val, int32_t false_val, RegStorage rs_dest,
208                                   int dest_reg_class) {
209   // TODO: Generalize the IT below to accept more than one-instruction loads.
210   DCHECK(InexpensiveConstantInt(true_val));
211   DCHECK(InexpensiveConstantInt(false_val));
212 
213   if ((true_val == 0 && code == kCondEq) ||
214       (false_val == 0 && code == kCondNe)) {
215     OpRegRegReg(kOpSub, rs_dest, left_op, right_op);
216     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
217     LIR* it = OpIT(kCondNe, "");
218     LoadConstant(rs_dest, code == kCondEq ? false_val : true_val);
219     OpEndIT(it);
220     return;
221   }
222 
223   OpRegReg(kOpCmp, left_op, right_op);  // Same?
224   LIR* it = OpIT(code, "E");   // if-convert the test
225   LoadConstant(rs_dest, true_val);      // .eq case - load true
226   LoadConstant(rs_dest, false_val);     // .eq case - load true
227   OpEndIT(it);
228 }
229 
GenSelect(BasicBlock * bb,MIR * mir)230 void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
231   RegLocation rl_result;
232   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
233   RegLocation rl_dest = mir_graph_->GetDest(mir);
234   // Avoid using float regs here.
235   RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
236   RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
237   rl_src = LoadValue(rl_src, src_reg_class);
238   ConditionCode ccode = mir->meta.ccode;
239   if (mir->ssa_rep->num_uses == 1) {
240     // CONST case
241     int true_val = mir->dalvikInsn.vB;
242     int false_val = mir->dalvikInsn.vC;
243     rl_result = EvalLoc(rl_dest, result_reg_class, true);
244     // Change kCondNe to kCondEq for the special cases below.
245     if (ccode == kCondNe) {
246       ccode = kCondEq;
247       std::swap(true_val, false_val);
248     }
249     bool cheap_false_val = InexpensiveConstantInt(false_val);
250     if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
251       OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
252       DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
253       LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
254       LoadConstant(rl_result.reg, false_val);
255       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
256     } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
257       OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
258       DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
259       LIR* it = OpIT(kCondLs, "");
260       LoadConstant(rl_result.reg, false_val);
261       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
262     } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
263       OpRegImm(kOpCmp, rl_src.reg, 0);
264       LIR* it = OpIT(ccode, "E");
265       LoadConstant(rl_result.reg, true_val);
266       LoadConstant(rl_result.reg, false_val);
267       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
268     } else {
269       // Unlikely case - could be tuned.
270       RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
271       RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
272       LoadConstant(t_reg1, true_val);
273       LoadConstant(t_reg2, false_val);
274       OpRegImm(kOpCmp, rl_src.reg, 0);
275       LIR* it = OpIT(ccode, "E");
276       OpRegCopy(rl_result.reg, t_reg1);
277       OpRegCopy(rl_result.reg, t_reg2);
278       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
279     }
280   } else {
281     // MOVE case
282     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
283     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
284     rl_true = LoadValue(rl_true, result_reg_class);
285     rl_false = LoadValue(rl_false, result_reg_class);
286     rl_result = EvalLoc(rl_dest, result_reg_class, true);
287     OpRegImm(kOpCmp, rl_src.reg, 0);
288     LIR* it = nullptr;
289     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
290       it = OpIT(NegateComparison(ccode), "");
291       OpRegCopy(rl_result.reg, rl_false.reg);
292     } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
293       it = OpIT(ccode, "");
294       OpRegCopy(rl_result.reg, rl_true.reg);
295     } else {  // Normal - select between the two.
296       it = OpIT(ccode, "E");
297       OpRegCopy(rl_result.reg, rl_true.reg);
298       OpRegCopy(rl_result.reg, rl_false.reg);
299     }
300     OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
301   }
302   StoreValue(rl_dest, rl_result);
303 }
304 
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)305 void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
306   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
307   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
308   // Normalize such that if either operand is constant, src2 will be constant.
309   ConditionCode ccode = mir->meta.ccode;
310   if (rl_src1.is_const) {
311     std::swap(rl_src1, rl_src2);
312     ccode = FlipComparisonOrder(ccode);
313   }
314   if (rl_src2.is_const) {
315     rl_src2 = UpdateLocWide(rl_src2);
316     // Do special compare/branch against simple const operand if not already in registers.
317     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
318     if ((rl_src2.location != kLocPhysReg) &&
319         ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
320       GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
321       return;
322     }
323   }
324   LIR* taken = &block_label_list_[bb->taken];
325   LIR* not_taken = &block_label_list_[bb->fall_through];
326   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
327   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
328   OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
329   switch (ccode) {
330     case kCondEq:
331       OpCondBranch(kCondNe, not_taken);
332       break;
333     case kCondNe:
334       OpCondBranch(kCondNe, taken);
335       break;
336     case kCondLt:
337       OpCondBranch(kCondLt, taken);
338       OpCondBranch(kCondGt, not_taken);
339       ccode = kCondUlt;
340       break;
341     case kCondLe:
342       OpCondBranch(kCondLt, taken);
343       OpCondBranch(kCondGt, not_taken);
344       ccode = kCondLs;
345       break;
346     case kCondGt:
347       OpCondBranch(kCondGt, taken);
348       OpCondBranch(kCondLt, not_taken);
349       ccode = kCondHi;
350       break;
351     case kCondGe:
352       OpCondBranch(kCondGt, taken);
353       OpCondBranch(kCondLt, not_taken);
354       ccode = kCondUge;
355       break;
356     default:
357       LOG(FATAL) << "Unexpected ccode: " << ccode;
358   }
359   OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
360   OpCondBranch(ccode, taken);
361 }
362 
363 /*
364  * Generate a register comparison to an immediate and branch.  Caller
365  * is responsible for setting branch target field.
366  */
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)367 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
368   LIR* branch = nullptr;
369   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
370   /*
371    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
372    * compare-and-branch if zero is ideal if it will reach.  However, because null checks
373    * branch forward to a slow path, they will frequently not reach - and thus have to
374    * be converted to a long form during assembly (which will trigger another assembly
375    * pass).  Here we estimate the branch distance for checks, and if large directly
376    * generate the long form in an attempt to avoid an extra assembly pass.
377    * TODO: consider interspersing slowpaths in code following unconditional branches.
378    */
379   bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
380   skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
381   if (!skip && reg.Low8() && (check_value == 0)) {
382     if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
383       branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
384                        reg.GetReg(), 0);
385     } else if (arm_cond == kArmCondLs) {
386       // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
387       // This case happens for a bounds check of array[0].
388       branch = NewLIR2(kThumb2Cbz, reg.GetReg(), 0);
389     }
390   }
391 
392   if (branch == nullptr) {
393     OpRegImm(kOpCmp, reg, check_value);
394     branch = NewLIR2(kThumbBCond, 0, arm_cond);
395   }
396 
397   branch->target = target;
398   return branch;
399 }
400 
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)401 LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
402   LIR* res;
403   int opcode;
404   // If src or dest is a pair, we'll be using low reg.
405   if (r_dest.IsPair()) {
406     r_dest = r_dest.GetLow();
407   }
408   if (r_src.IsPair()) {
409     r_src = r_src.GetLow();
410   }
411   if (r_dest.IsFloat() || r_src.IsFloat())
412     return OpFpRegCopy(r_dest, r_src);
413   if (r_dest.Low8() && r_src.Low8())
414     opcode = kThumbMovRR;
415   else if (!r_dest.Low8() && !r_src.Low8())
416      opcode = kThumbMovRR_H2H;
417   else if (r_dest.Low8())
418      opcode = kThumbMovRR_H2L;
419   else
420      opcode = kThumbMovRR_L2H;
421   res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
422   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
423     res->flags.is_nop = true;
424   }
425   return res;
426 }
427 
OpRegCopy(RegStorage r_dest,RegStorage r_src)428 void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
429   if (r_dest != r_src) {
430     LIR* res = OpRegCopyNoInsert(r_dest, r_src);
431     AppendLIR(res);
432   }
433 }
434 
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)435 void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
436   if (r_dest != r_src) {
437     bool dest_fp = r_dest.IsFloat();
438     bool src_fp = r_src.IsFloat();
439     DCHECK(r_dest.Is64Bit());
440     DCHECK(r_src.Is64Bit());
441     if (dest_fp) {
442       if (src_fp) {
443         OpRegCopy(r_dest, r_src);
444       } else {
445         NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
446       }
447     } else {
448       if (src_fp) {
449         NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
450       } else {
451         // Handle overlap
452         if (r_src.GetHighReg() == r_dest.GetLowReg()) {
453           DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
454           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
455           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
456         } else {
457           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
458           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
459         }
460       }
461     }
462   }
463 }
464 
465 // Table of magic divisors
466 struct MagicTable {
467   uint32_t magic;
468   uint32_t shift;
469   DividePattern pattern;
470 };
471 
472 static const MagicTable magic_table[] = {
473   {0, 0, DivideNone},        // 0
474   {0, 0, DivideNone},        // 1
475   {0, 0, DivideNone},        // 2
476   {0x55555556, 0, Divide3},  // 3
477   {0, 0, DivideNone},        // 4
478   {0x66666667, 1, Divide5},  // 5
479   {0x2AAAAAAB, 0, Divide3},  // 6
480   {0x92492493, 2, Divide7},  // 7
481   {0, 0, DivideNone},        // 8
482   {0x38E38E39, 1, Divide5},  // 9
483   {0x66666667, 2, Divide5},  // 10
484   {0x2E8BA2E9, 1, Divide5},  // 11
485   {0x2AAAAAAB, 1, Divide5},  // 12
486   {0x4EC4EC4F, 2, Divide5},  // 13
487   {0x92492493, 3, Divide7},  // 14
488   {0x88888889, 3, Divide7},  // 15
489 };
490 
491 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)492 bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
493                                     RegLocation rl_src, RegLocation rl_dest, int lit) {
494   if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
495     return false;
496   }
497   DividePattern pattern = magic_table[lit].pattern;
498   if (pattern == DivideNone) {
499     return false;
500   }
501 
502   RegStorage r_magic = AllocTemp();
503   LoadConstant(r_magic, magic_table[lit].magic);
504   rl_src = LoadValue(rl_src, kCoreReg);
505   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
506   RegStorage r_hi = AllocTemp();
507   RegStorage r_lo = AllocTemp();
508 
509   // rl_dest and rl_src might overlap.
510   // Reuse r_hi to save the div result for reminder case.
511   RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
512 
513   NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
514   switch (pattern) {
515     case Divide3:
516       OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
517       break;
518     case Divide5:
519       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
520       OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
521                        EncodeShift(kArmAsr, magic_table[lit].shift));
522       break;
523     case Divide7:
524       OpRegReg(kOpAdd, r_hi, rl_src.reg);
525       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
526       OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
527                        EncodeShift(kArmAsr, magic_table[lit].shift));
528       break;
529     default:
530       LOG(FATAL) << "Unexpected pattern: " << pattern;
531   }
532 
533   if (!is_div) {
534     // div_result = src / lit
535     // tmp1 = div_result * lit
536     // dest = src - tmp1
537     RegStorage tmp1 = r_lo;
538     EasyMultiplyOp ops[2];
539 
540     bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
541     DCHECK_NE(canEasyMultiply, false);
542 
543     GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
544     OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
545   }
546 
547   StoreValue(rl_dest, rl_result);
548   return true;
549 }
550 
551 // Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
GetEasyMultiplyOp(int lit,ArmMir2Lir::EasyMultiplyOp * op)552 bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
553   if (IsPowerOfTwo(lit)) {
554     op->op = kOpLsl;
555     op->shift = LowestSetBit(lit);
556     return true;
557   }
558 
559   if (IsPowerOfTwo(lit - 1)) {
560     op->op = kOpAdd;
561     op->shift = LowestSetBit(lit - 1);
562     return true;
563   }
564 
565   if (IsPowerOfTwo(lit + 1)) {
566     op->op = kOpRsub;
567     op->shift = LowestSetBit(lit + 1);
568     return true;
569   }
570 
571   op->op = kOpInvalid;
572   op->shift = 0;
573   return false;
574 }
575 
576 // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
GetEasyMultiplyTwoOps(int lit,EasyMultiplyOp * ops)577 bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
578   GetEasyMultiplyOp(lit, &ops[0]);
579   if (GetEasyMultiplyOp(lit, &ops[0])) {
580     ops[1].op = kOpInvalid;
581     ops[1].shift = 0;
582     return true;
583   }
584 
585   int lit1 = lit;
586   uint32_t shift = LowestSetBit(lit1);
587   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
588     ops[1].op = kOpLsl;
589     ops[1].shift = shift;
590     return true;
591   }
592 
593   lit1 = lit - 1;
594   shift = LowestSetBit(lit1);
595   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
596     ops[1].op = kOpAdd;
597     ops[1].shift = shift;
598     return true;
599   }
600 
601   lit1 = lit + 1;
602   shift = LowestSetBit(lit1);
603   if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
604     ops[1].op = kOpRsub;
605     ops[1].shift = shift;
606     return true;
607   }
608 
609   return false;
610 }
611 
612 // Generate instructions to do multiply.
613 // Additional temporary register is required,
614 // if it need to generate 2 instructions and src/dest overlap.
GenEasyMultiplyTwoOps(RegStorage r_dest,RegStorage r_src,EasyMultiplyOp * ops)615 void ArmMir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
616   // tmp1 = ( src << shift1) + [ src | -src | 0 ]
617   // dest = (tmp1 << shift2) + [ src | -src | 0 ]
618 
619   RegStorage r_tmp1;
620   if (ops[1].op == kOpInvalid) {
621     r_tmp1 = r_dest;
622   } else if (r_dest.GetReg() != r_src.GetReg()) {
623     r_tmp1 = r_dest;
624   } else {
625     r_tmp1 = AllocTemp();
626   }
627 
628   switch (ops[0].op) {
629     case kOpLsl:
630       OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
631       break;
632     case kOpAdd:
633       OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
634       break;
635     case kOpRsub:
636       OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
637       break;
638     default:
639       DCHECK_EQ(ops[0].op, kOpInvalid);
640       break;
641   }
642 
643   switch (ops[1].op) {
644     case kOpInvalid:
645       return;
646     case kOpLsl:
647       OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
648       break;
649     case kOpAdd:
650       OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
651       break;
652     case kOpRsub:
653       OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
654       break;
655     default:
656       LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
657       break;
658   }
659 }
660 
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)661 bool ArmMir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
662   EasyMultiplyOp ops[2];
663 
664   if (!GetEasyMultiplyTwoOps(lit, ops)) {
665     return false;
666   }
667 
668   rl_src = LoadValue(rl_src, kCoreReg);
669   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
670 
671   GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
672   StoreValue(rl_dest, rl_result);
673   return true;
674 }
675 
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,bool check_zero)676 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
677                       RegLocation rl_src2, bool is_div, bool check_zero) {
678   LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
679   return rl_dest;
680 }
681 
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src1,int lit,bool is_div)682 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
683   LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
684   return rl_dest;
685 }
686 
GenDivRemLit(RegLocation rl_dest,RegStorage reg1,int lit,bool is_div)687 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
688   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
689 
690   // Put the literal in a temp.
691   RegStorage lit_temp = AllocTemp();
692   LoadConstant(lit_temp, lit);
693   // Use the generic case for div/rem with arg2 in a register.
694   // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
695   rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
696   FreeTemp(lit_temp);
697 
698   return rl_result;
699 }
700 
GenDivRem(RegLocation rl_dest,RegStorage reg1,RegStorage reg2,bool is_div)701 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
702                                   bool is_div) {
703   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
704   if (is_div) {
705     // Simple case, use sdiv instruction.
706     OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
707   } else {
708     // Remainder case, use the following code:
709     // temp = reg1 / reg2      - integer division
710     // temp = temp * reg2
711     // dest = reg1 - temp
712 
713     RegStorage temp = AllocTemp();
714     OpRegRegReg(kOpDiv, temp, reg1, reg2);
715     OpRegReg(kOpMul, temp, reg2);
716     OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
717     FreeTemp(temp);
718   }
719 
720   return rl_result;
721 }
722 
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)723 bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
724   DCHECK_EQ(cu_->instruction_set, kThumb2);
725   if (is_long) {
726     return false;
727   }
728   RegLocation rl_src1 = info->args[0];
729   RegLocation rl_src2 = info->args[1];
730   rl_src1 = LoadValue(rl_src1, kCoreReg);
731   rl_src2 = LoadValue(rl_src2, kCoreReg);
732   RegLocation rl_dest = InlineTarget(info);
733   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
734   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
735   LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
736   OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
737   OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
738   OpEndIT(it);
739   StoreValue(rl_dest, rl_result);
740   return true;
741 }
742 
GenInlinedPeek(CallInfo * info,OpSize size)743 bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
744   RegLocation rl_src_address = info->args[0];  // long address
745   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
746   RegLocation rl_dest = InlineTarget(info);
747   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
748   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
749   if (size == k64) {
750     // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
751     if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
752       Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
753       Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
754     } else {
755       Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
756       Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
757     }
758     StoreValueWide(rl_dest, rl_result);
759   } else {
760     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
761     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
762     LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
763     StoreValue(rl_dest, rl_result);
764   }
765   return true;
766 }
767 
GenInlinedPoke(CallInfo * info,OpSize size)768 bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
769   RegLocation rl_src_address = info->args[0];  // long address
770   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
771   RegLocation rl_src_value = info->args[2];  // [size] value
772   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
773   if (size == k64) {
774     // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
775     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
776     StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32, kNotVolatile);
777     StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32, kNotVolatile);
778   } else {
779     DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
780     // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
781     RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
782     StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
783   }
784   return true;
785 }
786 
787 // Generate a CAS with memory_order_seq_cst semantics.
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)788 bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
789   DCHECK_EQ(cu_->instruction_set, kThumb2);
790   // Unused - RegLocation rl_src_unsafe = info->args[0];
791   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
792   RegLocation rl_src_offset = info->args[2];  // long low
793   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
794   RegLocation rl_src_expected = info->args[4];  // int, long or Object
795   // If is_long, high half is in info->args[5]
796   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
797   // If is_long, high half is in info->args[7]
798   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
799 
800   // We have only 5 temporary registers available and actually only 4 if the InlineTarget
801   // above locked one of the temps. For a straightforward CAS64 we need 7 registers:
802   // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor
803   // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop
804   // into the same temps, reducing the number of required temps down to 5. We shall work
805   // around the potentially locked temp by using LR for r_ptr, unconditionally.
806   // TODO: Pass information about the need for more temps to the stack frame generation
807   // code so that we can rely on being able to allocate enough temps.
808   DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
809   MarkTemp(rs_rARM_LR);
810   FreeTemp(rs_rARM_LR);
811   LockTemp(rs_rARM_LR);
812   bool load_early = true;
813   if (is_long) {
814     RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
815         rl_src_expected.reg;
816     RegStorage new_val_reg = rl_src_new_value.reg.IsPair() ? rl_src_new_value.reg.GetLow() :
817         rl_src_new_value.reg;
818     bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !expected_reg.IsFloat();
819     bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !new_val_reg.IsFloat();
820     bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg);
821     bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg);
822 
823     if (!expected_is_good_reg && !new_value_is_good_reg) {
824       // None of expected/new_value is non-temp reg, need to load both late
825       load_early = false;
826       // Make sure they are not in the temp regs and the load will not be skipped.
827       if (expected_is_core_reg) {
828         FlushRegWide(rl_src_expected.reg);
829         ClobberSReg(rl_src_expected.s_reg_low);
830         ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low));
831         rl_src_expected.location = kLocDalvikFrame;
832       }
833       if (new_value_is_core_reg) {
834         FlushRegWide(rl_src_new_value.reg);
835         ClobberSReg(rl_src_new_value.s_reg_low);
836         ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low));
837         rl_src_new_value.location = kLocDalvikFrame;
838       }
839     }
840   }
841 
842   // Prevent reordering with prior memory operations.
843   GenMemBarrier(kAnyStore);
844 
845   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
846   RegLocation rl_new_value;
847   if (!is_long) {
848     rl_new_value = LoadValue(rl_src_new_value);
849   } else if (load_early) {
850     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
851   }
852 
853   if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
854     // Mark card for object assuming new value is stored.
855     MarkGCCard(rl_new_value.reg, rl_object.reg);
856   }
857 
858   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
859 
860   RegStorage r_ptr = rs_rARM_LR;
861   OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
862 
863   // Free now unneeded rl_object and rl_offset to give more temps.
864   ClobberSReg(rl_object.s_reg_low);
865   FreeTemp(rl_object.reg);
866   ClobberSReg(rl_offset.s_reg_low);
867   FreeTemp(rl_offset.reg);
868 
869   RegLocation rl_expected;
870   if (!is_long) {
871     rl_expected = LoadValue(rl_src_expected);
872   } else if (load_early) {
873     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
874   } else {
875     // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
876     RegStorage low_reg = AllocTemp();
877     RegStorage high_reg = AllocTemp();
878     rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
879     rl_expected = rl_new_value;
880   }
881 
882   // do {
883   //   tmp = [r_ptr] - expected;
884   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
885   // result = tmp != 0;
886 
887   RegStorage r_tmp = AllocTemp();
888   LIR* target = NewLIR0(kPseudoTargetLabel);
889 
890   LIR* it = nullptr;
891   if (is_long) {
892     RegStorage r_tmp_high = AllocTemp();
893     if (!load_early) {
894       LoadValueDirectWide(rl_src_expected, rl_expected.reg);
895     }
896     NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
897     OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
898     OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
899     if (!load_early) {
900       LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
901     }
902     // Make sure we use ORR that sets the ccode
903     if (r_tmp.Low8() && r_tmp_high.Low8()) {
904       NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
905     } else {
906       NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
907     }
908     FreeTemp(r_tmp_high);  // Now unneeded
909 
910     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
911     it = OpIT(kCondEq, "T");
912     NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
913 
914   } else {
915     NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
916     OpRegReg(kOpSub, r_tmp, rl_expected.reg);
917     DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
918     it = OpIT(kCondEq, "T");
919     NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
920   }
921 
922   // Still one conditional left from OpIT(kCondEq, "T") from either branch
923   OpRegImm(kOpCmp /* eq */, r_tmp, 1);
924   OpEndIT(it);
925 
926   OpCondBranch(kCondEq, target);
927 
928   if (!load_early) {
929     FreeTemp(rl_expected.reg);  // Now unneeded.
930   }
931 
932   // Prevent reordering with subsequent memory operations.
933   GenMemBarrier(kLoadAny);
934 
935   // result := (tmp1 != 0) ? 0 : 1;
936   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
937   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
938   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
939   it = OpIT(kCondUlt, "");
940   LoadConstant(rl_result.reg, 0); /* cc */
941   FreeTemp(r_tmp);  // Now unneeded.
942   OpEndIT(it);     // Barrier to terminate OpIT.
943 
944   StoreValue(rl_dest, rl_result);
945 
946   // Now, restore lr to its non-temp status.
947   Clobber(rs_rARM_LR);
948   UnmarkTemp(rs_rARM_LR);
949   return true;
950 }
951 
GenInlinedArrayCopyCharArray(CallInfo * info)952 bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
953   constexpr int kLargeArrayThreshold = 256;
954 
955   RegLocation rl_src = info->args[0];
956   RegLocation rl_src_pos = info->args[1];
957   RegLocation rl_dst = info->args[2];
958   RegLocation rl_dst_pos = info->args[3];
959   RegLocation rl_length = info->args[4];
960   // Compile time check, handle exception by non-inline method to reduce related meta-data.
961   if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
962       (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
963       (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
964     return false;
965   }
966 
967   ClobberCallerSave();
968   LockCallTemps();  // Prepare for explicit register usage.
969   LockTemp(rs_r12);
970   RegStorage rs_src = rs_r0;
971   RegStorage rs_dst = rs_r1;
972   LoadValueDirectFixed(rl_src, rs_src);
973   LoadValueDirectFixed(rl_dst, rs_dst);
974 
975   // Handle null pointer exception in slow-path.
976   LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
977   LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
978   // Handle potential overlapping in slow-path.
979   LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
980   // Handle exception or big length in slow-path.
981   RegStorage rs_length = rs_r2;
982   LoadValueDirectFixed(rl_length, rs_length);
983   LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
984   // Src bounds check.
985   RegStorage rs_pos = rs_r3;
986   RegStorage rs_arr_length = rs_r12;
987   LoadValueDirectFixed(rl_src_pos, rs_pos);
988   LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
989   Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
990   OpRegReg(kOpSub, rs_arr_length, rs_pos);
991   LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
992   // Dst bounds check.
993   LoadValueDirectFixed(rl_dst_pos, rs_pos);
994   LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
995   Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
996   OpRegReg(kOpSub, rs_arr_length, rs_pos);
997   LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
998 
999   // Everything is checked now.
1000   OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
1001   OpRegReg(kOpAdd, rs_dst, rs_pos);
1002   OpRegReg(kOpAdd, rs_dst, rs_pos);
1003   OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
1004   LoadValueDirectFixed(rl_src_pos, rs_pos);
1005   OpRegReg(kOpAdd, rs_src, rs_pos);
1006   OpRegReg(kOpAdd, rs_src, rs_pos);
1007 
1008   RegStorage rs_tmp = rs_pos;
1009   OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
1010 
1011   // Copy one element.
1012   OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2);
1013   LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
1014   OpRegImm(kOpSub, rs_length, 2);
1015   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
1016   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
1017 
1018   // Copy two elements.
1019   LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
1020   LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
1021   OpRegImm(kOpSub, rs_length, 4);
1022   LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
1023   StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
1024   OpUnconditionalBranch(begin_loop);
1025 
1026   LIR *check_failed = NewLIR0(kPseudoTargetLabel);
1027   LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
1028   LIR* return_point = NewLIR0(kPseudoTargetLabel);
1029 
1030   src_check_branch->target = check_failed;
1031   dst_check_branch->target = check_failed;
1032   src_dst_same->target = check_failed;
1033   len_neg_or_too_big->target = check_failed;
1034   src_pos_negative->target = check_failed;
1035   src_bad_len->target = check_failed;
1036   dst_pos_negative->target = check_failed;
1037   dst_bad_len->target = check_failed;
1038   jmp_to_begin_loop->target = begin_loop;
1039   jmp_to_ret->target = return_point;
1040 
1041   AddIntrinsicSlowPath(info, launchpad_branch, return_point);
1042   ClobberCallerSave();  // We must clobber everything because slow path will return here
1043 
1044   return true;
1045 }
1046 
OpPcRelLoad(RegStorage reg,LIR * target)1047 LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1048   return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
1049 }
1050 
OpVldm(RegStorage r_base,int count)1051 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
1052   return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
1053 }
1054 
OpVstm(RegStorage r_base,int count)1055 LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) {
1056   return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
1057 }
1058 
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1059 void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1060                                                RegLocation rl_result, int lit,
1061                                                int first_bit, int second_bit) {
1062   OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
1063                    EncodeShift(kArmLsl, second_bit - first_bit));
1064   if (first_bit != 0) {
1065     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1066   }
1067 }
1068 
GenDivZeroCheckWide(RegStorage reg)1069 void ArmMir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1070   DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
1071   RegStorage t_reg = AllocTemp();
1072   NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
1073   FreeTemp(t_reg);
1074   GenDivZeroCheck(kCondEq);
1075 }
1076 
1077 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1078 LIR* ArmMir2Lir::OpTestSuspend(LIR* target) {
1079 #ifdef ARM_R4_SUSPEND_FLAG
1080   NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
1081   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
1082 #else
1083   RegStorage t_reg = AllocTemp();
1084   LoadBaseDisp(rs_rARM_SELF, Thread::ThreadFlagsOffset<4>().Int32Value(),
1085     t_reg, kUnsignedHalf);
1086   LIR* cmp_branch = OpCmpImmBranch((target == NULL) ? kCondNe : kCondEq, t_reg,
1087     0, target);
1088   FreeTemp(t_reg);
1089   return cmp_branch;
1090 #endif
1091 }
1092 
1093 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1094 LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1095   // Combine sub & test using sub setflags encoding here
1096   OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
1097   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
1098   return OpCondBranch(c_code, target);
1099 }
1100 
GenMemBarrier(MemBarrierKind barrier_kind)1101 bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
1102 #if ANDROID_SMP != 0
1103   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
1104   LIR* barrier = last_lir_insn_;
1105 
1106   int dmb_flavor;
1107   // TODO: revisit Arm barrier kinds
1108   switch (barrier_kind) {
1109     case kAnyStore: dmb_flavor = kISH; break;
1110     case kLoadAny: dmb_flavor = kISH; break;
1111     case kStoreStore: dmb_flavor = kISHST; break;
1112     case kAnyAny: dmb_flavor = kISH; break;
1113     default:
1114       LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
1115       dmb_flavor = kSY;  // quiet gcc.
1116       break;
1117   }
1118 
1119   bool ret = false;
1120 
1121   // If the same barrier already exists, don't generate another.
1122   if (barrier == nullptr
1123       || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
1124     barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
1125     ret = true;
1126   }
1127 
1128   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
1129   DCHECK(!barrier->flags.use_def_invalid);
1130   barrier->u.m.def_mask = &kEncodeAll;
1131   return ret;
1132 #else
1133   return false;
1134 #endif
1135 }
1136 
GenNegLong(RegLocation rl_dest,RegLocation rl_src)1137 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1138   rl_src = LoadValueWide(rl_src, kCoreReg);
1139   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1140   RegStorage z_reg = AllocTemp();
1141   LoadConstantNoClobber(z_reg, 0);
1142   // Check for destructive overlap
1143   if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1144     RegStorage t_reg = AllocTemp();
1145     OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1146     OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
1147     FreeTemp(t_reg);
1148   } else {
1149     OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
1150     OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
1151   }
1152   FreeTemp(z_reg);
1153   StoreValueWide(rl_dest, rl_result);
1154 }
1155 
GenMulLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1156 void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
1157                             RegLocation rl_src1, RegLocation rl_src2) {
1158     /*
1159      * tmp1     = src1.hi * src2.lo;  // src1.hi is no longer needed
1160      * dest     = src1.lo * src2.lo;
1161      * tmp1    += src1.lo * src2.hi;
1162      * dest.hi += tmp1;
1163      *
1164      * To pull off inline multiply, we have a worst-case requirement of 7 temporary
1165      * registers.  Normally for Arm, we get 5.  We can get to 6 by including
1166      * lr in the temp set.  The only problematic case is all operands and result are
1167      * distinct, and none have been promoted.  In that case, we can succeed by aggressively
1168      * freeing operand temp registers after they are no longer needed.  All other cases
1169      * can proceed normally.  We'll just punt on the case of the result having a misaligned
1170      * overlap with either operand and send that case to a runtime handler.
1171      */
1172     RegLocation rl_result;
1173     if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
1174       FlushAllRegs();
1175       CallRuntimeHelperRegLocationRegLocation(kQuickLmul, rl_src1, rl_src2, false);
1176       rl_result = GetReturnWide(kCoreReg);
1177       StoreValueWide(rl_dest, rl_result);
1178       return;
1179     }
1180 
1181     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1182     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1183 
1184     int reg_status = 0;
1185     RegStorage res_lo;
1186     RegStorage res_hi;
1187     bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
1188         !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
1189     bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
1190     bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
1191     // Check if rl_dest is *not* either operand and we have enough temp registers.
1192     if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
1193         (dest_promoted || src1_promoted || src2_promoted)) {
1194       // In this case, we do not need to manually allocate temp registers for result.
1195       rl_result = EvalLoc(rl_dest, kCoreReg, true);
1196       res_lo = rl_result.reg.GetLow();
1197       res_hi = rl_result.reg.GetHigh();
1198     } else {
1199       res_lo = AllocTemp();
1200       if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
1201         // In this case, we have enough temp registers to be allocated for result.
1202         res_hi = AllocTemp();
1203         reg_status = 1;
1204       } else {
1205         // In this case, all temps are now allocated.
1206         // res_hi will be allocated after we can free src1_hi.
1207         reg_status = 2;
1208       }
1209     }
1210 
1211     // Temporarily add LR to the temp pool, and assign it to tmp1
1212     MarkTemp(rs_rARM_LR);
1213     FreeTemp(rs_rARM_LR);
1214     RegStorage tmp1 = rs_rARM_LR;
1215     LockTemp(rs_rARM_LR);
1216 
1217     if (rl_src1.reg == rl_src2.reg) {
1218       DCHECK(res_hi.Valid());
1219       DCHECK(res_lo.Valid());
1220       NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1221       NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
1222               rl_src1.reg.GetLowReg());
1223       OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
1224     } else {
1225       NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
1226       if (reg_status == 2) {
1227         DCHECK(!res_hi.Valid());
1228         DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
1229         DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
1230         // Will force free src1_hi, so must clobber.
1231         Clobber(rl_src1.reg);
1232         FreeTemp(rl_src1.reg.GetHigh());
1233         res_hi = AllocTemp();
1234       }
1235       DCHECK(res_hi.Valid());
1236       DCHECK(res_lo.Valid());
1237       NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
1238               rl_src1.reg.GetLowReg());
1239       NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
1240               tmp1.GetReg());
1241       NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
1242       if (reg_status == 2) {
1243         FreeTemp(rl_src1.reg.GetLow());
1244       }
1245     }
1246 
1247     // Now, restore lr to its non-temp status.
1248     FreeTemp(tmp1);
1249     Clobber(rs_rARM_LR);
1250     UnmarkTemp(rs_rARM_LR);
1251 
1252     if (reg_status != 0) {
1253       // We had manually allocated registers for rl_result.
1254       // Now construct a RegLocation.
1255       rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
1256       rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
1257     }
1258 
1259     StoreValueWide(rl_dest, rl_result);
1260 }
1261 
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1262 void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1263                                 RegLocation rl_src2) {
1264   switch (opcode) {
1265     case Instruction::MUL_LONG:
1266     case Instruction::MUL_LONG_2ADDR:
1267       GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
1268       return;
1269     case Instruction::NEG_LONG:
1270       GenNegLong(rl_dest, rl_src2);
1271       return;
1272 
1273     default:
1274       break;
1275   }
1276 
1277   // Fallback for all other ops.
1278   Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1279 }
1280 
1281 /*
1282  * Generate array load
1283  */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)1284 void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1285                              RegLocation rl_index, RegLocation rl_dest, int scale) {
1286   RegisterClass reg_class = RegClassBySize(size);
1287   int len_offset = mirror::Array::LengthOffset().Int32Value();
1288   int data_offset;
1289   RegLocation rl_result;
1290   bool constant_index = rl_index.is_const;
1291   rl_array = LoadValue(rl_array, kRefReg);
1292   if (!constant_index) {
1293     rl_index = LoadValue(rl_index, kCoreReg);
1294   }
1295 
1296   if (rl_dest.wide) {
1297     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1298   } else {
1299     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1300   }
1301 
1302   // If index is constant, just fold it into the data offset
1303   if (constant_index) {
1304     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1305   }
1306 
1307   /* null object? */
1308   GenNullCheck(rl_array.reg, opt_flags);
1309 
1310   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1311   RegStorage reg_len;
1312   if (needs_range_check) {
1313     reg_len = AllocTemp();
1314     /* Get len */
1315     Load32Disp(rl_array.reg, len_offset, reg_len);
1316     MarkPossibleNullPointerException(opt_flags);
1317   } else {
1318     ForceImplicitNullCheck(rl_array.reg, opt_flags);
1319   }
1320   if (rl_dest.wide || rl_dest.fp || constant_index) {
1321     RegStorage reg_ptr;
1322     if (constant_index) {
1323       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
1324     } else {
1325       // No special indexed operation, lea + load w/ displacement
1326       reg_ptr = AllocTempRef();
1327       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1328       FreeTemp(rl_index.reg);
1329     }
1330     rl_result = EvalLoc(rl_dest, reg_class, true);
1331 
1332     if (needs_range_check) {
1333       if (constant_index) {
1334         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1335       } else {
1336         GenArrayBoundsCheck(rl_index.reg, reg_len);
1337       }
1338       FreeTemp(reg_len);
1339     }
1340     LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
1341     MarkPossibleNullPointerException(opt_flags);
1342     if (!constant_index) {
1343       FreeTemp(reg_ptr);
1344     }
1345     if (rl_dest.wide) {
1346       StoreValueWide(rl_dest, rl_result);
1347     } else {
1348       StoreValue(rl_dest, rl_result);
1349     }
1350   } else {
1351     // Offset base, then use indexed load
1352     RegStorage reg_ptr = AllocTempRef();
1353     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1354     FreeTemp(rl_array.reg);
1355     rl_result = EvalLoc(rl_dest, reg_class, true);
1356 
1357     if (needs_range_check) {
1358       GenArrayBoundsCheck(rl_index.reg, reg_len);
1359       FreeTemp(reg_len);
1360     }
1361     LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
1362     MarkPossibleNullPointerException(opt_flags);
1363     FreeTemp(reg_ptr);
1364     StoreValue(rl_dest, rl_result);
1365   }
1366 }
1367 
1368 /*
1369  * Generate array store
1370  *
1371  */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)1372 void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1373                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1374   RegisterClass reg_class = RegClassBySize(size);
1375   int len_offset = mirror::Array::LengthOffset().Int32Value();
1376   bool constant_index = rl_index.is_const;
1377 
1378   int data_offset;
1379   if (size == k64 || size == kDouble) {
1380     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1381   } else {
1382     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1383   }
1384 
1385   // If index is constant, just fold it into the data offset.
1386   if (constant_index) {
1387     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
1388   }
1389 
1390   rl_array = LoadValue(rl_array, kRefReg);
1391   if (!constant_index) {
1392     rl_index = LoadValue(rl_index, kCoreReg);
1393   }
1394 
1395   RegStorage reg_ptr;
1396   bool allocated_reg_ptr_temp = false;
1397   if (constant_index) {
1398     reg_ptr = rl_array.reg;
1399   } else if (IsTemp(rl_array.reg) && !card_mark) {
1400     Clobber(rl_array.reg);
1401     reg_ptr = rl_array.reg;
1402   } else {
1403     allocated_reg_ptr_temp = true;
1404     reg_ptr = AllocTempRef();
1405   }
1406 
1407   /* null object? */
1408   GenNullCheck(rl_array.reg, opt_flags);
1409 
1410   bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1411   RegStorage reg_len;
1412   if (needs_range_check) {
1413     reg_len = AllocTemp();
1414     // NOTE: max live temps(4) here.
1415     /* Get len */
1416     Load32Disp(rl_array.reg, len_offset, reg_len);
1417     MarkPossibleNullPointerException(opt_flags);
1418   } else {
1419     ForceImplicitNullCheck(rl_array.reg, opt_flags);
1420   }
1421   /* at this point, reg_ptr points to array, 2 live temps */
1422   if (rl_src.wide || rl_src.fp || constant_index) {
1423     if (rl_src.wide) {
1424       rl_src = LoadValueWide(rl_src, reg_class);
1425     } else {
1426       rl_src = LoadValue(rl_src, reg_class);
1427     }
1428     if (!constant_index) {
1429       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
1430     }
1431     if (needs_range_check) {
1432       if (constant_index) {
1433         GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
1434       } else {
1435         GenArrayBoundsCheck(rl_index.reg, reg_len);
1436       }
1437       FreeTemp(reg_len);
1438     }
1439 
1440     StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1441     MarkPossibleNullPointerException(opt_flags);
1442   } else {
1443     /* reg_ptr -> array data */
1444     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1445     rl_src = LoadValue(rl_src, reg_class);
1446     if (needs_range_check) {
1447       GenArrayBoundsCheck(rl_index.reg, reg_len);
1448       FreeTemp(reg_len);
1449     }
1450     StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
1451     MarkPossibleNullPointerException(opt_flags);
1452   }
1453   if (allocated_reg_ptr_temp) {
1454     FreeTemp(reg_ptr);
1455   }
1456   if (card_mark) {
1457     MarkGCCard(rl_src.reg, rl_array.reg);
1458   }
1459 }
1460 
1461 
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift)1462 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
1463                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
1464   rl_src = LoadValueWide(rl_src, kCoreReg);
1465   // Per spec, we only care about low 6 bits of shift amount.
1466   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
1467   if (shift_amount == 0) {
1468     StoreValueWide(rl_dest, rl_src);
1469     return;
1470   }
1471   if (BadOverlap(rl_src, rl_dest)) {
1472     GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
1473     return;
1474   }
1475   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1476   switch (opcode) {
1477     case Instruction::SHL_LONG:
1478     case Instruction::SHL_LONG_2ADDR:
1479       if (shift_amount == 1) {
1480         OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
1481         OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
1482       } else if (shift_amount == 32) {
1483         OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
1484         LoadConstant(rl_result.reg.GetLow(), 0);
1485       } else if (shift_amount > 31) {
1486         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
1487         LoadConstant(rl_result.reg.GetLow(), 0);
1488       } else {
1489         OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1490         OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
1491                          EncodeShift(kArmLsr, 32 - shift_amount));
1492         OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
1493       }
1494       break;
1495     case Instruction::SHR_LONG:
1496     case Instruction::SHR_LONG_2ADDR:
1497       if (shift_amount == 32) {
1498         OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1499         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1500       } else if (shift_amount > 31) {
1501         OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1502         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
1503       } else {
1504         RegStorage t_reg = AllocTemp();
1505         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1506         OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1507                          EncodeShift(kArmLsl, 32 - shift_amount));
1508         FreeTemp(t_reg);
1509         OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1510       }
1511       break;
1512     case Instruction::USHR_LONG:
1513     case Instruction::USHR_LONG_2ADDR:
1514       if (shift_amount == 32) {
1515         OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
1516         LoadConstant(rl_result.reg.GetHigh(), 0);
1517       } else if (shift_amount > 31) {
1518         OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
1519         LoadConstant(rl_result.reg.GetHigh(), 0);
1520       } else {
1521         RegStorage t_reg = AllocTemp();
1522         OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
1523         OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
1524                          EncodeShift(kArmLsl, 32 - shift_amount));
1525         FreeTemp(t_reg);
1526         OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
1527       }
1528       break;
1529     default:
1530       LOG(FATAL) << "Unexpected case";
1531   }
1532   StoreValueWide(rl_dest, rl_result);
1533 }
1534 
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1535 void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
1536                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
1537   if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
1538     if (!rl_src2.is_const) {
1539       // Don't bother with special handling for subtract from immediate.
1540       GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1541       return;
1542     }
1543   } else {
1544     // Normalize
1545     if (!rl_src2.is_const) {
1546       DCHECK(rl_src1.is_const);
1547       std::swap(rl_src1, rl_src2);
1548     }
1549   }
1550   if (BadOverlap(rl_src1, rl_dest)) {
1551     GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1552     return;
1553   }
1554   DCHECK(rl_src2.is_const);
1555   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1556   uint32_t val_lo = Low32Bits(val);
1557   uint32_t val_hi = High32Bits(val);
1558   int32_t mod_imm_lo = ModifiedImmediate(val_lo);
1559   int32_t mod_imm_hi = ModifiedImmediate(val_hi);
1560 
1561   // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
1562   switch (opcode) {
1563     case Instruction::ADD_LONG:
1564     case Instruction::ADD_LONG_2ADDR:
1565     case Instruction::SUB_LONG:
1566     case Instruction::SUB_LONG_2ADDR:
1567       if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
1568         GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1569         return;
1570       }
1571       break;
1572     default:
1573       break;
1574   }
1575   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1576   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1577   // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
1578   switch (opcode) {
1579     case Instruction::ADD_LONG:
1580     case Instruction::ADD_LONG_2ADDR:
1581       NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1582       NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1583       break;
1584     case Instruction::OR_LONG:
1585     case Instruction::OR_LONG_2ADDR:
1586       if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1587         OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1588       }
1589       if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1590         OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1591       }
1592       break;
1593     case Instruction::XOR_LONG:
1594     case Instruction::XOR_LONG_2ADDR:
1595       OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1596       OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1597       break;
1598     case Instruction::AND_LONG:
1599     case Instruction::AND_LONG_2ADDR:
1600       if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
1601         OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
1602       }
1603       if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
1604         OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
1605       }
1606       break;
1607     case Instruction::SUB_LONG_2ADDR:
1608     case Instruction::SUB_LONG:
1609       NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
1610       NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
1611       break;
1612     default:
1613       LOG(FATAL) << "Unexpected opcode " << opcode;
1614   }
1615   StoreValueWide(rl_dest, rl_result);
1616 }
1617 
1618 }  // namespace art
1619