1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * This file contains codegen for the Thumb ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
genNegFloat(CompilationUnit * cUnit,RegLocation rlDest,RegLocation rlSrc)25 static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest,
26 RegLocation rlSrc)
27 {
28 RegLocation rlResult;
29 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
30 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true);
31 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
32 storeValue(cUnit, rlDest, rlResult);
33 }
34
genNegDouble(CompilationUnit * cUnit,RegLocation rlDest,RegLocation rlSrc)35 static void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest,
36 RegLocation rlSrc)
37 {
38 RegLocation rlResult;
39 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
40 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true);
41 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
42 S2D(rlSrc.lowReg, rlSrc.highReg));
43 storeValueWide(cUnit, rlDest, rlResult);
44 }
45
46 /*
47 * To avoid possible conflicts, we use a lot of temps here. Note that
48 * our usage of Thumb2 instruction forms avoids the problems with register
49 * reuse for multiply instructions prior to arm6.
50 */
genMulLong(CompilationUnit * cUnit,RegLocation rlDest,RegLocation rlSrc1,RegLocation rlSrc2)51 static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest,
52 RegLocation rlSrc1, RegLocation rlSrc2)
53 {
54 RegLocation rlResult;
55 int resLo = dvmCompilerAllocTemp(cUnit);
56 int resHi = dvmCompilerAllocTemp(cUnit);
57 int tmp1 = dvmCompilerAllocTemp(cUnit);
58
59 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
60 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
61
62 newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
63 newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
64 newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
65 newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
66 dvmCompilerFreeTemp(cUnit, tmp1);
67
68 rlResult = dvmCompilerGetReturnWide(cUnit); // Just as a template, will patch
69 rlResult.lowReg = resLo;
70 rlResult.highReg = resHi;
71 storeValueWide(cUnit, rlDest, rlResult);
72 }
73
genLong3Addr(CompilationUnit * cUnit,MIR * mir,OpKind firstOp,OpKind secondOp,RegLocation rlDest,RegLocation rlSrc1,RegLocation rlSrc2)74 static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp,
75 OpKind secondOp, RegLocation rlDest,
76 RegLocation rlSrc1, RegLocation rlSrc2)
77 {
78 RegLocation rlResult;
79 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
80 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
81 rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
82 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
83 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
84 rlSrc2.highReg);
85 storeValueWide(cUnit, rlDest, rlResult);
86 }
87
dvmCompilerInitializeRegAlloc(CompilationUnit * cUnit)88 void dvmCompilerInitializeRegAlloc(CompilationUnit *cUnit)
89 {
90 int numTemps = sizeof(coreTemps)/sizeof(int);
91 int numFPTemps = sizeof(fpTemps)/sizeof(int);
92 RegisterPool *pool = dvmCompilerNew(sizeof(*pool), true);
93 cUnit->regPool = pool;
94 pool->numCoreTemps = numTemps;
95 pool->coreTemps =
96 dvmCompilerNew(numTemps * sizeof(*cUnit->regPool->coreTemps), true);
97 pool->numFPTemps = numFPTemps;
98 pool->FPTemps =
99 dvmCompilerNew(numFPTemps * sizeof(*cUnit->regPool->FPTemps), true);
100 pool->numCoreRegs = 0;
101 pool->coreRegs = NULL;
102 pool->numFPRegs = 0;
103 pool->FPRegs = NULL;
104 dvmCompilerInitPool(pool->coreTemps, coreTemps, pool->numCoreTemps);
105 dvmCompilerInitPool(pool->FPTemps, fpTemps, pool->numFPTemps);
106 dvmCompilerInitPool(pool->coreRegs, NULL, 0);
107 dvmCompilerInitPool(pool->FPRegs, NULL, 0);
108 pool->nullCheckedRegs =
109 dvmCompilerAllocBitVector(cUnit->numSSARegs, false);
110 }
111
112 /*
113 * Generate a Thumb2 IT instruction, which can nullify up to
114 * four subsequent instructions based on a condition and its
115 * inverse. The condition applies to the first instruction, which
116 * is executed if the condition is met. The string "guide" consists
117 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
118 * A "T" means the instruction is executed if the condition is
119 * met, and an "E" means the instruction is executed if the condition
120 * is not met.
121 */
genIT(CompilationUnit * cUnit,ArmConditionCode code,char * guide)122 static ArmLIR *genIT(CompilationUnit *cUnit, ArmConditionCode code,
123 char *guide)
124 {
125 int mask;
126 int condBit = code & 1;
127 int altBit = condBit ^ 1;
128 int mask3 = 0;
129 int mask2 = 0;
130 int mask1 = 0;
131
132 //Note: case fallthroughs intentional
133 switch(strlen(guide)) {
134 case 3:
135 mask1 = (guide[2] == 'T') ? condBit : altBit;
136 case 2:
137 mask2 = (guide[1] == 'T') ? condBit : altBit;
138 case 1:
139 mask3 = (guide[0] == 'T') ? condBit : altBit;
140 break;
141 case 0:
142 break;
143 default:
144 LOGE("Jit: bad case in genIT");
145 dvmCompilerAbort(cUnit);
146 }
147 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
148 (1 << (3 - strlen(guide)));
149 return newLIR2(cUnit, kThumb2It, code, mask);
150 }
151
152 /* Export the Dalvik PC assicated with an instruction to the StackSave area */
genExportPC(CompilationUnit * cUnit,MIR * mir)153 static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir)
154 {
155 ArmLIR *res;
156 int offset = offsetof(StackSaveArea, xtra.currentPc);
157 int rDPC = dvmCompilerAllocTemp(cUnit);
158 res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
159 newLIR3(cUnit, kThumb2StrRRI8Predec, rDPC, rFP,
160 sizeof(StackSaveArea) - offset);
161 dvmCompilerFreeTemp(cUnit, rDPC);
162 return res;
163 }
164
165 /*
166 * Handle simple case (thin lock) inline. If it's complicated, bail
167 * out to the heavyweight lock/unlock routines. We'll use dedicated
168 * registers here in order to be in the right position in case we
169 * to bail to dvm[Lock/Unlock]Object(self, object)
170 *
171 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
172 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
173 * r2 -> intial contents of object->lock, later result of strex
174 * r3 -> self->threadId
175 * r7 -> temp to hold new lock value [unlock only]
176 * r4 -> allow to be used by utilities as general temp
177 *
178 * The result of the strex is 0 if we acquire the lock.
179 *
180 * See comments in Sync.c for the layout of the lock word.
181 * Of particular interest to this code is the test for the
182 * simple case - which we handle inline. For monitor enter, the
183 * simple case is thin lock, held by no-one. For monitor exit,
184 * the simple case is thin lock, held by the unlocking thread with
185 * a recurse count of 0.
186 *
187 * A minor complication is that there is a field in the lock word
188 * unrelated to locking: the hash state. This field must be ignored, but
189 * preserved.
190 *
191 */
genMonitorEnter(CompilationUnit * cUnit,MIR * mir)192 static void genMonitorEnter(CompilationUnit *cUnit, MIR *mir)
193 {
194 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
195 ArmLIR *target;
196 ArmLIR *hopTarget;
197 ArmLIR *branch;
198 ArmLIR *hopBranch;
199
200 assert(LW_SHAPE_THIN == 0);
201 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
202 dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage
203 dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use
204 loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0); // Get self
205 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
206 loadWordDisp(cUnit, r0, offsetof(Thread, threadId), r3); // Get threadId
207 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
208 offsetof(Object, lock) >> 2); // Get object->lock
209 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
210 // Is lock unheld on lock or held by us (==threadId) on unlock?
211 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1);
212 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
213 LW_LOCK_OWNER_SHIFT - 1);
214 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
215 newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2);
216 dvmCompilerGenMemBarrier(cUnit);
217 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
218
219 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
220 hopTarget->defMask = ENCODE_ALL;
221 hopBranch->generic.target = (LIR *)hopTarget;
222
223 // Clear the lock
224 ArmLIR *inst = newLIR0(cUnit, kThumb2Clrex);
225 // ...and make it a scheduling barrier
226 inst->defMask = ENCODE_ALL;
227
228 // Export PC (part 1)
229 loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset));
230
231 /* Get dPC of next insn */
232 loadConstant(cUnit, r4PC, (int)(cUnit->method->insns + mir->offset +
233 dexGetInstrWidthAbs(gDvm.instrWidth, OP_MONITOR_ENTER)));
234 // Export PC (part 2)
235 newLIR3(cUnit, kThumb2StrRRI8Predec, r3, rFP,
236 sizeof(StackSaveArea) -
237 offsetof(StackSaveArea, xtra.currentPc));
238 /* Call template, and don't return */
239 genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER);
240 // Resume here
241 target = newLIR0(cUnit, kArmPseudoTargetLabel);
242 target->defMask = ENCODE_ALL;
243 branch->generic.target = (LIR *)target;
244 }
245
246 /*
247 * For monitor unlock, we don't have to use ldrex/strex. Once
248 * we've determined that the lock is thin and that we own it with
249 * a zero recursion count, it's safe to punch it back to the
250 * initial, unlock thin state with a store word.
251 */
genMonitorExit(CompilationUnit * cUnit,MIR * mir)252 static void genMonitorExit(CompilationUnit *cUnit, MIR *mir)
253 {
254 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
255 ArmLIR *target;
256 ArmLIR *branch;
257 ArmLIR *hopTarget;
258 ArmLIR *hopBranch;
259
260 assert(LW_SHAPE_THIN == 0);
261 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
262 dvmCompilerLockAllTemps(cUnit); // Prepare for explicit register usage
263 dvmCompilerFreeTemp(cUnit, r4PC); // Free up r4 for general use
264 loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0); // Get self
265 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
266 loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock
267 loadWordDisp(cUnit, r0, offsetof(Thread, threadId), r3); // Get threadId
268 // Is lock unheld on lock or held by us (==threadId) on unlock?
269 opRegRegImm(cUnit, kOpAnd, r7, r2,
270 (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
271 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
272 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
273 LW_LOCK_OWNER_SHIFT - 1);
274 opRegReg(cUnit, kOpSub, r2, r3);
275 hopBranch = opCondBranch(cUnit, kArmCondNe);
276 dvmCompilerGenMemBarrier(cUnit);
277 storeWordDisp(cUnit, r1, offsetof(Object, lock), r7);
278 branch = opNone(cUnit, kOpUncondBr);
279
280 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
281 hopTarget->defMask = ENCODE_ALL;
282 hopBranch->generic.target = (LIR *)hopTarget;
283
284 // Export PC (part 1)
285 loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset));
286
287 LOAD_FUNC_ADDR(cUnit, r7, (int)dvmUnlockObject);
288 // Export PC (part 2)
289 newLIR3(cUnit, kThumb2StrRRI8Predec, r3, rFP,
290 sizeof(StackSaveArea) -
291 offsetof(StackSaveArea, xtra.currentPc));
292 opReg(cUnit, kOpBlx, r7);
293 opRegImm(cUnit, kOpCmp, r0, 0); /* Did we throw? */
294 ArmLIR *branchOver = opCondBranch(cUnit, kArmCondNe);
295 loadConstant(cUnit, r0,
296 (int) (cUnit->method->insns + mir->offset +
297 dexGetInstrWidthAbs(gDvm.instrWidth, OP_MONITOR_EXIT)));
298 genDispatchToHandler(cUnit, TEMPLATE_THROW_EXCEPTION_COMMON);
299
300 // Resume here
301 target = newLIR0(cUnit, kArmPseudoTargetLabel);
302 target->defMask = ENCODE_ALL;
303 branch->generic.target = (LIR *)target;
304 branchOver->generic.target = (LIR *) target;
305 }
306
genMonitor(CompilationUnit * cUnit,MIR * mir)307 static void genMonitor(CompilationUnit *cUnit, MIR *mir)
308 {
309 if (mir->dalvikInsn.opCode == OP_MONITOR_ENTER)
310 genMonitorEnter(cUnit, mir);
311 else
312 genMonitorExit(cUnit, mir);
313 }
314
315 /*
316 * 64-bit 3way compare function.
317 * mov r7, #-1
318 * cmp op1hi, op2hi
319 * blt done
320 * bgt flip
321 * sub r7, op1lo, op2lo (treat as unsigned)
322 * beq done
323 * ite hi
324 * mov(hi) r7, #-1
325 * mov(!hi) r7, #1
326 * flip:
327 * neg r7
328 * done:
329 */
genCmpLong(CompilationUnit * cUnit,MIR * mir,RegLocation rlDest,RegLocation rlSrc1,RegLocation rlSrc2)330 static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
331 RegLocation rlDest, RegLocation rlSrc1,
332 RegLocation rlSrc2)
333 {
334 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
335 ArmLIR *target1;
336 ArmLIR *target2;
337 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
338 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
339 rlTemp.lowReg = dvmCompilerAllocTemp(cUnit);
340 loadConstant(cUnit, rlTemp.lowReg, -1);
341 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
342 ArmLIR *branch1 = opCondBranch(cUnit, kArmCondLt);
343 ArmLIR *branch2 = opCondBranch(cUnit, kArmCondGt);
344 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
345 ArmLIR *branch3 = opCondBranch(cUnit, kArmCondEq);
346
347 genIT(cUnit, kArmCondHi, "E");
348 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
349 loadConstant(cUnit, rlTemp.lowReg, 1);
350 genBarrier(cUnit);
351
352 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
353 target2->defMask = -1;
354 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
355
356 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
357 target1->defMask = -1;
358
359 storeValue(cUnit, rlDest, rlTemp);
360
361 branch1->generic.target = (LIR *)target1;
362 branch2->generic.target = (LIR *)target2;
363 branch3->generic.target = branch1->generic.target;
364 }
365
genInlinedAbsFloat(CompilationUnit * cUnit,MIR * mir)366 static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
367 {
368 RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
369 RegLocation rlDest = inlinedTarget(cUnit, mir, true);
370 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
371 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true);
372 newLIR2(cUnit, kThumb2Vabss, rlResult.lowReg, rlSrc.lowReg);
373 storeValue(cUnit, rlDest, rlResult);
374 return true;
375 }
376
genInlinedAbsDouble(CompilationUnit * cUnit,MIR * mir)377 static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
378 {
379 RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1);
380 RegLocation rlDest = inlinedTargetWide(cUnit, mir, true);
381 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
382 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kFPReg, true);
383 newLIR2(cUnit, kThumb2Vabsd, S2D(rlResult.lowReg, rlResult.highReg),
384 S2D(rlSrc.lowReg, rlSrc.highReg));
385 storeValueWide(cUnit, rlDest, rlResult);
386 return true;
387 }
388
genInlinedMinMaxInt(CompilationUnit * cUnit,MIR * mir,bool isMin)389 static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
390 {
391 RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0);
392 RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1);
393 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
394 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
395 RegLocation rlDest = inlinedTarget(cUnit, mir, false);
396 RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
397 opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
398 genIT(cUnit, (isMin) ? kArmCondGt : kArmCondLt, "E");
399 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc2.lowReg);
400 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc1.lowReg);
401 genBarrier(cUnit);
402 storeValue(cUnit, rlDest, rlResult);
403 return false;
404 }
405
genMultiplyByTwoBitMultiplier(CompilationUnit * cUnit,RegLocation rlSrc,RegLocation rlResult,int lit,int firstBit,int secondBit)406 static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
407 RegLocation rlSrc, RegLocation rlResult, int lit,
408 int firstBit, int secondBit)
409 {
410 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
411 encodeShift(kArmLsl, secondBit - firstBit));
412 if (firstBit != 0) {
413 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
414 }
415 }
416