1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe
30
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34
35 /*
36 * Determine the initial instruction set to be used for this trace.
37 * Later components may decide to change this.
38 */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42 return DALVIK_JIT_IA32;
43 }
44
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47 return DALVIK_JIT_IA32;
48 }
49
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53 return NULL;
54 }
55
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64 /* Target-specific configuration */
65 gDvmJit.jitTableSize = 1 << 12;
66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67 if (gDvmJit.threshold == 0) {
68 gDvmJit.threshold = 255;
69 }
70 gDvmJit.codeCacheSize = 512*1024;
71 gDvmJit.optLevel = kJitOptLevelO1;
72
73 //Disable Method-JIT
74 gDvmJit.disableOpt |= (1 << kMethodJit);
75
76 #if defined(WITH_SELF_VERIFICATION)
77 /* Force into blocking mode */
78 gDvmJit.blockingMode = true;
79 gDvm.nativeDebuggerActive = true;
80 #endif
81
82 // Make sure all threads have current values
83 dvmJitUpdateThreadStateAll();
84
85 return true;
86 }
87
dvmCompilerPatchInlineCache(void)88 void dvmCompilerPatchInlineCache(void)
89 {
90 int i;
91 PredictedChainingCell *minAddr, *maxAddr;
92
93 /* Nothing to be done */
94 if (gDvmJit.compilerICPatchIndex == 0) return;
95
96 /*
97 * Since all threads are already stopped we don't really need to acquire
98 * the lock. But race condition can be easily introduced in the future w/o
99 * paying attention so we still acquire the lock here.
100 */
101 dvmLockMutex(&gDvmJit.compilerICPatchLock);
102
103 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
104
105 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
106
107 /* Initialize the min/max address range */
108 minAddr = (PredictedChainingCell *)
109 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
110 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
111
112 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
113 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
114 PredictedChainingCell *cellAddr = workOrder->cellAddr;
115 PredictedChainingCell *cellContent = &workOrder->cellContent;
116 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
117 workOrder->classLoader);
118
119 assert(clazz->serialNumber == workOrder->serialNumber);
120
121 /* Use the newly resolved clazz pointer */
122 cellContent->clazz = clazz;
123
124 if (cellAddr->clazz == NULL) {
125 COMPILER_TRACE_CHAINING(
126 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
127 cellAddr,
128 cellContent->clazz->descriptor,
129 cellContent->method->name));
130 } else {
131 COMPILER_TRACE_CHAINING(
132 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
133 "patched",
134 cellAddr,
135 cellAddr->clazz->descriptor,
136 cellContent->clazz->descriptor,
137 cellContent->method->name));
138 }
139
140 /* Patch the chaining cell */
141 *cellAddr = *cellContent;
142 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
143 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
144 }
145
146 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
147
148 gDvmJit.compilerICPatchIndex = 0;
149 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
150 }
151
152 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)153 void dvmCompilerCacheClear(char *start, size_t size)
154 {
155 /* "0xFF 0xFF" is an invalid opcode for x86. */
156 memset(start, 0xFF, size);
157 }
158
159 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)160 void dvmJitCalleeSave(double *saveArea) {
161 }
162
dvmJitCalleeRestore(double * saveArea)163 void dvmJitCalleeRestore(double *saveArea) {
164 }
165
dvmJitToInterpSingleStep()166 void dvmJitToInterpSingleStep() {
167 }
168
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)169 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
170 const JitEntry *knownEntry) {
171 return NULL;
172 }
173
dvmCompilerCodegenDump(CompilationUnit * cUnit)174 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
175 {
176 }
177
dvmCompilerArchDump(void)178 void dvmCompilerArchDump(void)
179 {
180 }
181
getTraceBase(const JitEntry * p)182 char *getTraceBase(const JitEntry *p)
183 {
184 return NULL;
185 }
186
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)187 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
188 {
189 }
190
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)191 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
192 {
193 }
194
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)195 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
196 {
197 // Method-based JIT not supported for x86.
198 }
199
dvmJitScanAllClassPointers(void (* callback)(void *))200 void dvmJitScanAllClassPointers(void (*callback)(void *))
201 {
202 }
203
204 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)205 static inline int getProfileCount(const JitEntry *entry)
206 {
207 if (entry->dPC == 0 || entry->codeAddress == 0)
208 return 0;
209 u4 *pExecutionCount = (u4 *) getTraceBase(entry);
210
211 return pExecutionCount ? *pExecutionCount : 0;
212 }
213
214 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)215 static int sortTraceProfileCount(const void *entry1, const void *entry2)
216 {
217 const JitEntry *jitEntry1 = (const JitEntry *)entry1;
218 const JitEntry *jitEntry2 = (const JitEntry *)entry2;
219
220 JitTraceCounter_t count1 = getProfileCount(jitEntry1);
221 JitTraceCounter_t count2 = getProfileCount(jitEntry2);
222 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
223 }
224
225 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()226 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
227 {
228 JitEntry *sortedEntries;
229 int numTraces = 0;
230 unsigned long counts = 0;
231 unsigned int i;
232
233 /* Make sure that the table is not changing */
234 dvmLockMutex(&gDvmJit.tableLock);
235
236 /* Sort the entries by descending order */
237 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
238 if (sortedEntries == NULL)
239 goto done;
240 memcpy(sortedEntries, gDvmJit.pJitEntryTable,
241 sizeof(JitEntry) * gDvmJit.jitTableSize);
242 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
243 sortTraceProfileCount);
244
245 /* Dump the sorted entries */
246 for (i=0; i < gDvmJit.jitTableSize; i++) {
247 if (sortedEntries[i].dPC != 0) {
248 numTraces++;
249 }
250 }
251 if (numTraces == 0)
252 numTraces = 1;
253 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
254
255 free(sortedEntries);
256 done:
257 dvmUnlockMutex(&gDvmJit.tableLock);
258 return;
259 }
260
jumpWithRelOffset(char * instAddr,int relOffset)261 void jumpWithRelOffset(char* instAddr, int relOffset) {
262 stream = instAddr;
263 OpndSize immSize = estOpndSizeFromImm(relOffset);
264 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
265 dump_imm(Mnemonic_JMP, immSize, relOffset);
266 }
267
268 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)269 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
270 stream = instAddr;
271 bool unknown;
272 OpndSize size;
273 int relativeNCG = targetId;
274 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
275 unconditional_jump_int(relativeNCG, size);
276 return NULL;
277 }
278
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)279 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
280 stream = instAddr;
281 bool unknown;
282 OpndSize size;
283 int relativeNCG = targetId;
284 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
285 conditional_jump_int(cc, relativeNCG, size);
286 return NULL;
287 }
288
289 /*
290 * Attempt to enqueue a work order to patch an inline cache for a predicted
291 * chaining cell for virtual/interface calls.
292 */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)293 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
294 PredictedChainingCell *newContent)
295 {
296 bool result = true;
297
298 /*
299 * Make sure only one thread gets here since updating the cell (ie fast
300 * path and queueing the request (ie the queued path) have to be done
301 * in an atomic fashion.
302 */
303 dvmLockMutex(&gDvmJit.compilerICPatchLock);
304
305 /* Fast path for uninitialized chaining cell */
306 if (cellAddr->clazz == NULL &&
307 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
308 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
309
310 cellAddr->method = newContent->method;
311 cellAddr->branch = newContent->branch;
312 cellAddr->branch2 = newContent->branch2;
313
314 /*
315 * The update order matters - make sure clazz is updated last since it
316 * will bring the uninitialized chaining cell to life.
317 */
318 android_atomic_release_store((int32_t)newContent->clazz,
319 (volatile int32_t *)(void*) &cellAddr->clazz);
320 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
321 UPDATE_CODE_CACHE_PATCHES();
322
323 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
324
325 #if 0
326 MEM_BARRIER();
327 cellAddr->clazz = newContent->clazz;
328 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
329 #endif
330 #if defined(WITH_JIT_TUNING)
331 gDvmJit.icPatchInit++;
332 #endif
333 COMPILER_TRACE_CHAINING(
334 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
335 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
336 /* Check if this is a frequently missed clazz */
337 } else if (cellAddr->stagedClazz != newContent->clazz) {
338 /* Not proven to be frequent yet - build up the filter cache */
339 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340
341 cellAddr->stagedClazz = newContent->clazz;
342
343 UPDATE_CODE_CACHE_PATCHES();
344 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
345
346 #if defined(WITH_JIT_TUNING)
347 gDvmJit.icPatchRejected++;
348 #endif
349 /*
350 * Different classes but same method implementation - it is safe to just
351 * patch the class value without the need to stop the world.
352 */
353 } else if (cellAddr->method == newContent->method) {
354 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
355
356 cellAddr->clazz = newContent->clazz;
357 /* No need to flush the cache here since the branch is not patched */
358 UPDATE_CODE_CACHE_PATCHES();
359
360 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
361
362 #if defined(WITH_JIT_TUNING)
363 gDvmJit.icPatchLockFree++;
364 #endif
365 /*
366 * Cannot patch the chaining cell inline - queue it until the next safe
367 * point.
368 */
369 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) {
370 int index = gDvmJit.compilerICPatchIndex++;
371 const ClassObject *clazz = newContent->clazz;
372
373 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
374 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
375 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
376 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
377 /* For verification purpose only */
378 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
379
380 #if defined(WITH_JIT_TUNING)
381 gDvmJit.icPatchQueued++;
382 #endif
383 COMPILER_TRACE_CHAINING(
384 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
385 cellAddr, newContent->clazz->descriptor, newContent->method->name));
386 } else {
387 /* Queue is full - just drop this patch request */
388 #if defined(WITH_JIT_TUNING)
389 gDvmJit.icPatchDropped++;
390 #endif
391
392 COMPILER_TRACE_CHAINING(
393 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
394 cellAddr, newContent->clazz->descriptor, newContent->method->name));
395 }
396
397 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
398 return result;
399 }
400
401 /*
402 * This method is called from the invoke templates for virtual and interface
403 * methods to speculatively setup a chain to the callee. The templates are
404 * written in assembly and have setup method, cell, and clazz at r0, r2, and
405 * r3 respectively, so there is a unused argument in the list. Upon return one
406 * of the following three results may happen:
407 * 1) Chain is not setup because the callee is native. Reset the rechain
408 * count to a big number so that it will take a long time before the next
409 * rechain attempt to happen.
410 * 2) Chain is not setup because the callee has not been created yet. Reset
411 * the rechain count to a small number and retry in the near future.
412 * 3) Ask all other threads to stop before patching this chaining cell.
413 * This is required because another thread may have passed the class check
414 * but hasn't reached the chaining cell yet to follow the chain. If we
415 * patch the content before halting the other thread, there could be a
416 * small window for race conditions to happen that it may follow the new
417 * but wrong chain to invoke a different method.
418 */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)419 const Method *dvmJitToPatchPredictedChain(const Method *method,
420 Thread *self,
421 PredictedChainingCell *cell,
422 const ClassObject *clazz)
423 {
424 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
425 /* Don't come back here for a long time if the method is native */
426 if (dvmIsNativeMethod(method)) {
427 UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
428
429 /*
430 * Put a non-zero/bogus value in the clazz field so that it won't
431 * trigger immediate patching and will continue to fail to match with
432 * a real clazz pointer.
433 */
434 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
435
436 UPDATE_CODE_CACHE_PATCHES();
437 PROTECT_CODE_CACHE(cell, sizeof(*cell));
438 COMPILER_TRACE_CHAINING(
439 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
440 cell, method->name));
441 goto done;
442 }
443 {
444 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
445
446 /*
447 * Compilation not made yet for the callee. Reset the counter to a small
448 * value and come back to check soon.
449 */
450 if ((tgtAddr == 0) ||
451 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
452 COMPILER_TRACE_CHAINING(
453 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
454 cell, method->clazz->descriptor, method->name));
455 goto done;
456 }
457
458 PredictedChainingCell newCell;
459
460 if (cell->clazz == NULL) {
461 newRechainCount = self->icRechainCount;
462 }
463
464 int relOffset = (int) tgtAddr - (int)cell;
465 OpndSize immSize = estOpndSizeFromImm(relOffset);
466 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
467 relOffset -= jumpSize;
468 COMPILER_TRACE_CHAINING(
469 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
470 cell, method->clazz->descriptor, method->name, jumpSize));
471 //can't use stream here since it is used by the compilation thread
472 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
473
474 newCell.clazz = clazz;
475 newCell.method = method;
476
477 /*
478 * Enter the work order to the queue and the chaining cell will be patched
479 * the next time a safe point is entered.
480 *
481 * If the enqueuing fails reset the rechain count to a normal value so that
482 * it won't get indefinitely delayed.
483 */
484 inlineCachePatchEnqueue(cell, &newCell);
485 }
486 done:
487 self->icRechainCount = newRechainCount;
488 return method;
489 }
490
491 /*
492 * Unchain a trace given the starting address of the translation
493 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
494 * For ARM, it returns the address following the last cell unchained.
495 * For IA, it returns NULL since cacheflush is not required for IA.
496 */
dvmJitUnchain(void * codeAddr)497 u4* dvmJitUnchain(void* codeAddr)
498 {
499 /* codeAddr is 4-byte aligned, so is chain cell count offset */
500 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
501 u2 chainCellCountOffset = *pChainCellCountOffset;
502 /* chain cell counts information is 4-byte aligned */
503 ChainCellCounts *pChainCellCounts =
504 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
505 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
506 u2 chainCellOffset = *pChainCellOffset;
507 u1* pChainCells;
508 int i,j;
509 PredictedChainingCell *predChainCell;
510 int padding;
511
512 /* Locate the beginning of the chain cell region */
513 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
514
515 /* The cells are sorted in order - walk through them and reset */
516 for (i = 0; i < kChainingCellGap; i++) {
517 /* for hot, normal, singleton chaining:
518 nop //padding.
519 jmp 0
520 mov imm32, reg1
521 mov imm32, reg2
522 call reg2
523 after chaining:
524 nop
525 jmp imm
526 mov imm32, reg1
527 mov imm32, reg2
528 call reg2
529 after unchaining:
530 nop
531 jmp 0
532 mov imm32, reg1
533 mov imm32, reg2
534 call reg2
535 Space occupied by the chaining cell in bytes: nop is for padding,
536 jump 0, the target 0 is 4 bytes aligned.
537 Space for predicted chaining: 5 words = 20 bytes
538 */
539 int elemSize = 0;
540 if (i == kChainingCellInvokePredicted) {
541 elemSize = 20;
542 }
543 COMPILER_TRACE_CHAINING(
544 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
545
546 for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
547 switch(i) {
548 case kChainingCellNormal:
549 case kChainingCellHot:
550 case kChainingCellInvokeSingleton:
551 case kChainingCellBackwardBranch:
552 COMPILER_TRACE_CHAINING(
553 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
554 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
555 elemSize = 4+5+5+2;
556 memset(pChainCells, 0, 4);
557 break;
558 case kChainingCellInvokePredicted:
559 COMPILER_TRACE_CHAINING(
560 ALOGI("Jit Runtime: unchaining of predicted"));
561 /* 4-byte aligned */
562 padding = (4 - ((u4)pChainCells & 3)) & 3;
563 pChainCells += padding;
564 predChainCell = (PredictedChainingCell *) pChainCells;
565 /*
566 * There could be a race on another mutator thread to use
567 * this particular predicted cell and the check has passed
568 * the clazz comparison. So we cannot safely wipe the
569 * method and branch but it is safe to clear the clazz,
570 * which serves as the key.
571 */
572 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
573 break;
574 default:
575 ALOGE("Unexpected chaining type: %d", i);
576 dvmAbort(); // dvmAbort OK here - can't safely recover
577 }
578 COMPILER_TRACE_CHAINING(
579 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
580 pChainCells += elemSize; /* Advance by a fixed number of bytes */
581 }
582 }
583 return NULL;
584 }
585
586 /* Unchain all translation in the cache. */
dvmJitUnchainAll()587 void dvmJitUnchainAll()
588 {
589 ALOGV("Jit Runtime: unchaining all");
590 if (gDvmJit.pJitEntryTable != NULL) {
591 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
592 dvmLockMutex(&gDvmJit.tableLock);
593
594 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
595
596 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
597 if (gDvmJit.pJitEntryTable[i].dPC &&
598 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
599 gDvmJit.pJitEntryTable[i].codeAddress) {
600 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
601 }
602 }
603
604 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
605
606 dvmUnlockMutex(&gDvmJit.tableLock);
607 gDvmJit.translationChains = 0;
608 }
609 gDvmJit.hasNewChain = false;
610 }
611
612 #define P_GPR_1 PhysicalReg_EBX
613 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()614 static void insertJumpHelp()
615 {
616 int rem = (uint)stream % 4;
617 int nop_size = 3 - rem;
618 dump_nop(nop_size);
619 unconditional_jump_int(0, OpndSize_32);
620 return;
621 }
622
623 /* Chaining cell for code that may need warmup. */
624 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
625 blx r0
626 data 0xb23a //bytecode address: 0x5115b23a
627 data 0x5115
628 IA32 assembly:
629 jmp 0 //5 bytes
630 movl address, %ebx
631 movl dvmJitToInterpNormal, %eax
632 call %eax
633 <-- return address
634 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)635 static void handleNormalChainingCell(CompilationUnit *cUnit,
636 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
637 {
638 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
639 cUnit->method->name, blockId, offset, stream - streamMethodStart);
640 if(dump_x86_inst)
641 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
642 offset, stream - streamMethodStart, stream);
643 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
644 * reslove the multithreading issue.
645 */
646 insertJumpHelp();
647 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
648 scratchRegs[0] = PhysicalReg_EAX;
649 call_dvmJitToInterpNormal();
650 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
651 }
652
653 /*
654 * Chaining cell for instructions that immediately following already translated
655 * code.
656 */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)657 static void handleHotChainingCell(CompilationUnit *cUnit,
658 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
659 {
660 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
661 cUnit->method->name, blockId, offset, stream - streamMethodStart);
662 if(dump_x86_inst)
663 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
664 offset, stream - streamMethodStart, stream);
665 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
666 * reslove the multithreading issue.
667 */
668 insertJumpHelp();
669 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
670 scratchRegs[0] = PhysicalReg_EAX;
671 call_dvmJitToInterpTraceSelect();
672 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
673 }
674
675 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)676 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
677 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
678 {
679 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
680 cUnit->method->name, blockId, offset, stream - streamMethodStart);
681 if(dump_x86_inst)
682 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
683 offset, stream - streamMethodStart, stream);
684 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
685 * reslove the multithreading issue.
686 */
687 insertJumpHelp();
688 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
689 scratchRegs[0] = PhysicalReg_EAX;
690 call_dvmJitToInterpNormal();
691 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
692 }
693
694 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)695 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
696 const Method *callee, int blockId, LowOpBlockLabel* labelList)
697 {
698 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
699 cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
700 if(dump_x86_inst)
701 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
702 blockId, stream - streamMethodStart, stream);
703 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
704 * reslove the multithreading issue.
705 */
706 insertJumpHelp();
707 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
708 scratchRegs[0] = PhysicalReg_EAX;
709 call_dvmJitToInterpTraceSelect();
710 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
711 }
712 #undef P_GPR_1
713
714 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)715 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
716 {
717 if(dump_x86_inst)
718 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
719 blockId, stream - streamMethodStart, stream);
720 #ifndef PREDICTED_CHAINING
721 //assume rPC for callee->insns in %ebx
722 scratchRegs[0] = PhysicalReg_EAX;
723 #if defined(WITH_JIT_TUNING)
724 /* Predicted chaining is not enabled. Fall back to interpreter and
725 * indicate that predicted chaining was not done.
726 */
727 move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
728 #endif
729 call_dvmJitToInterpTraceSelectNoChain();
730 #else
731 /* make sure section for predicited chaining cell is 4-byte aligned */
732 //int padding = (4 - ((u4)stream & 3)) & 3;
733 //stream += padding;
734 int* streamData = (int*)stream;
735 /* Should not be executed in the initial state */
736 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
737 streamData[1] = 0;
738 /* To be filled: class */
739 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
740 /* To be filled: method */
741 streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
742 /*
743 * Rechain count. The initial value of 0 here will trigger chaining upon
744 * the first invocation of this callsite.
745 */
746 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
747 #if 0
748 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
749 *((int*)(stream+8)), *((int*)(stream+12)));
750 #endif
751 stream += 20; //5 *4
752 #endif
753 }
754
755 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)756 static void handlePCReconstruction(CompilationUnit *cUnit,
757 LowOpBlockLabel *targetLabel)
758 {
759 #if 0
760 LowOp **pcrLabel =
761 (LowOp **) cUnit->pcReconstructionList.elemList;
762 int numElems = cUnit->pcReconstructionList.numUsed;
763 int i;
764 for (i = 0; i < numElems; i++) {
765 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
766 /* r0 = dalvik PC */
767 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
768 genUnconditionalBranch(cUnit, targetLabel);
769 }
770 #endif
771 }
772
773 //use O0 code generator for hoisted checks outside of the loop
774 /*
775 * vA = arrayReg;
776 * vB = idxReg;
777 * vC = endConditionReg;
778 * arg[0] = maxC
779 * arg[1] = minC
780 * arg[2] = loopBranchConditionCode
781 */
782 #define P_GPR_1 PhysicalReg_EBX
783 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)784 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
785 {
786 /*
787 * NOTE: these synthesized blocks don't have ssa names assigned
788 * for Dalvik registers. However, because they dominate the following
789 * blocks we can simply use the Dalvik name w/ subscript 0 as the
790 * ssa name.
791 */
792 DecodedInstruction *dInsn = &mir->dalvikInsn;
793 const int maxC = dInsn->arg[0];
794
795 /* assign array in virtual register to P_GPR_1 */
796 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
797 /* assign index in virtual register to P_GPR_2 */
798 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
799 export_pc();
800 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
801 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
802 int delta = maxC;
803 /*
804 * If the loop end condition is ">=" instead of ">", then the largest value
805 * of the index is "endCondition - 1".
806 */
807 if (dInsn->arg[2] == OP_IF_GE) {
808 delta--;
809 }
810
811 if (delta < 0) { //+delta
812 //if P_GPR_2 is mapped to a VR, we can't do this
813 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
814 } else if(delta > 0) {
815 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
816 }
817 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
818 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
819 }
820
821 /*
822 * vA = arrayReg;
823 * vB = idxReg;
824 * vC = endConditionReg;
825 * arg[0] = maxC
826 * arg[1] = minC
827 * arg[2] = loopBranchConditionCode
828 */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)829 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
830 {
831 DecodedInstruction *dInsn = &mir->dalvikInsn;
832 const int maxC = dInsn->arg[0];
833
834 /* assign array in virtual register to P_GPR_1 */
835 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
836 /* assign index in virtual register to P_GPR_2 */
837 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
838 export_pc();
839 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
840 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
841
842 if (maxC < 0) {
843 //if P_GPR_2 is mapped to a VR, we can't do this
844 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
845 } else if(maxC > 0) {
846 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
847 }
848 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
849 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
850
851 }
852 #undef P_GPR_1
853 #undef P_GPR_2
854
855 /*
856 * vA = idxReg;
857 * vB = minC;
858 */
859 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)860 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
861 {
862 DecodedInstruction *dInsn = &mir->dalvikInsn;
863 const int minC = dInsn->vB;
864 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
865 export_pc();
866 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
867 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
868 }
869 #undef P_GPR_1
870
871 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)872 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
873 {
874 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
875 if(gDvm.executionMode == kExecutionModeNcgO0) {
876 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
877 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
878 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
879 export_pc(); //use %edx
880 conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
881 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
882 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
883 } else {
884 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
885 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
886 nullCheck(5, false, 1, mir->dalvikInsn.vC);
887 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
888 compare_reg_reg(4, false, 6, false);
889 }
890
891 //immdiate will be updated later in genLandingPadForMispredictedCallee
892 streamMisPred = stream;
893 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
894 }
895 #endif
896
897 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)898 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
899 {
900 ExecutionMode origMode = gDvm.executionMode;
901 gDvm.executionMode = kExecutionModeNcgO0;
902 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
903 case kMirOpPhi: {
904 break;
905 }
906 case kMirOpNullNRangeUpCheck: {
907 genHoistedChecksForCountUpLoop(cUnit, mir);
908 break;
909 }
910 case kMirOpNullNRangeDownCheck: {
911 genHoistedChecksForCountDownLoop(cUnit, mir);
912 break;
913 }
914 case kMirOpLowerBound: {
915 genHoistedLowerBoundCheck(cUnit, mir);
916 break;
917 }
918 case kMirOpPunt: {
919 break;
920 }
921 #ifdef WITH_JIT_INLINING
922 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
923 genValidationForPredictedInline(cUnit, mir);
924 break;
925 }
926 #endif
927 default:
928 break;
929 }
930 gDvm.executionMode = origMode;
931 }
932
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)933 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
934 int bodyId)
935 {
936 /*
937 * Next, create two branches - one branch over to the loop body and the
938 * other branch to the PCR cell to punt.
939 */
940 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
941 //setupResourceMasks(branchToBody);
942 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
943
944 #if 0
945 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
946 branchToPCR->opCode = kThumbBUncond;
947 branchToPCR->generic.target = (LIR *) pcrLabel;
948 setupResourceMasks(branchToPCR);
949 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
950 #endif
951 }
952
953 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)954 bool mergeBlock(BasicBlock *bb) {
955 if(bb->blockType == kDalvikByteCode &&
956 bb->firstMIRInsn != NULL &&
957 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
958 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
959 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
960 bb->fallThrough == NULL) {// &&
961 //cUnit->hasLoop) {
962 //ALOGI("merge blocks ending with goto at index %d", i);
963 MIR* prevInsn = bb->lastMIRInsn->prev;
964 if(bb->taken == NULL) return false;
965 MIR* mergeInsn = bb->taken->firstMIRInsn;
966 if(mergeInsn == NULL) return false;
967 if(prevInsn == NULL) {//the block has a single instruction
968 bb->firstMIRInsn = mergeInsn;
969 } else {
970 prevInsn->next = mergeInsn; //remove goto from the chain
971 }
972 mergeInsn->prev = prevInsn;
973 bb->lastMIRInsn = bb->taken->lastMIRInsn;
974 bb->taken->firstMIRInsn = NULL; //block being merged in
975 bb->fallThrough = bb->taken->fallThrough;
976 bb->taken = bb->taken->taken;
977 return true;
978 }
979 return false;
980 }
981
genTraceProfileEntry(CompilationUnit * cUnit)982 static int genTraceProfileEntry(CompilationUnit *cUnit)
983 {
984 cUnit->headerSize = 6;
985 if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
986 (gDvmJit.profileMode == kTraceProfilingDisabled)) {
987 return 12;
988 } else {
989 return 4;
990 }
991
992 }
993
994 #define PRINT_BUFFER_LEN 1024
995 /* Print the code block in code cache in the range of [startAddr, endAddr)
996 * in readable format.
997 */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)998 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
999 {
1000 char strbuf[PRINT_BUFFER_LEN];
1001 unsigned char *addr;
1002 unsigned char *next_addr;
1003 int n;
1004
1005 if (gDvmJit.printBinary) {
1006 // print binary in bytes
1007 n = 0;
1008 for (addr = startAddr; addr < endAddr; addr++) {
1009 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
1010 if (n > PRINT_BUFFER_LEN - 10) {
1011 ALOGD("## %s", strbuf);
1012 n = 0;
1013 }
1014 }
1015 if (n > 0)
1016 ALOGD("## %s", strbuf);
1017 }
1018
1019 // print disassembled instructions
1020 addr = startAddr;
1021 while (addr < endAddr) {
1022 next_addr = reinterpret_cast<unsigned char*>
1023 (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1024 strbuf, PRINT_BUFFER_LEN));
1025 if (addr != next_addr) {
1026 ALOGD("** %p: %s", addr, strbuf);
1027 } else { // check whether this is nop padding
1028 if (addr[0] == 0x90) {
1029 ALOGD("** %p: NOP (1 byte)", addr);
1030 next_addr += 1;
1031 } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1032 ALOGD("** %p: NOP (2 bytes)", addr);
1033 next_addr += 2;
1034 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1035 ALOGD("** %p: NOP (3 bytes)", addr);
1036 next_addr += 3;
1037 } else {
1038 ALOGD("** unable to decode binary at %p", addr);
1039 break;
1040 }
1041 }
1042 addr = next_addr;
1043 }
1044 }
1045
1046 /* 4 is the number of additional bytes needed for chaining information for trace:
1047 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1048 #define EXTRA_BYTES_FOR_CHAINING 4
1049
1050 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1051 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1052 {
1053 dump_x86_inst = cUnit->printMe;
1054 /* Used to hold the labels of each block */
1055 LowOpBlockLabel *labelList =
1056 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1057 LowOp *headLIR = NULL;
1058 GrowableList chainingListByType[kChainingCellLast];
1059 unsigned int i, padding;
1060
1061 /*
1062 * Initialize various types chaining lists.
1063 */
1064 for (i = 0; i < kChainingCellLast; i++) {
1065 dvmInitGrowableList(&chainingListByType[i], 2);
1066 }
1067
1068 /* Clear the visited flag for each block */
1069 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1070 kAllNodes, false /* isIterative */);
1071
1072 GrowableListIterator iterator;
1073 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1074
1075 /* Traces start with a profiling entry point. Generate it here */
1076 cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1077
1078 //BasicBlock **blockList = cUnit->blockList;
1079 GrowableList *blockList = &cUnit->blockList;
1080 BasicBlock *bb;
1081
1082 info->codeAddress = NULL;
1083 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1084
1085 // TODO: compile into a temporary buffer and then copy into the code cache.
1086 // That would let us leave the code cache unprotected for a shorter time.
1087 size_t unprotected_code_cache_bytes =
1088 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1089 UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1090
1091 streamStart = stream; /* trace start before alignment */
1092 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1093 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1094 streamMethodStart = stream; /* code start */
1095 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1096 labelList[i].lop.generic.offset = -1;
1097 }
1098 cUnit->exceptionBlockId = -1;
1099 for (i = 0; i < blockList->numUsed; i++) {
1100 bb = (BasicBlock *) blockList->elemList[i];
1101 if(bb->blockType == kExceptionHandling)
1102 cUnit->exceptionBlockId = i;
1103 }
1104 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1105 if(gDvm.executionMode == kExecutionModeNcgO1) {
1106 //merge blocks ending with "goto" with the fall through block
1107 if (cUnit->jitMode != kJitLoop)
1108 for (i = 0; i < blockList->numUsed; i++) {
1109 bb = (BasicBlock *) blockList->elemList[i];
1110 bool merged = mergeBlock(bb);
1111 while(merged) merged = mergeBlock(bb);
1112 }
1113 for (i = 0; i < blockList->numUsed; i++) {
1114 bb = (BasicBlock *) blockList->elemList[i];
1115 if(bb->blockType == kDalvikByteCode &&
1116 bb->firstMIRInsn != NULL) {
1117 preprocessingBB(bb);
1118 }
1119 }
1120 preprocessingTrace();
1121 }
1122
1123 /* Handle the content in each basic block */
1124 for (i = 0; ; i++) {
1125 MIR *mir;
1126 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1127 if (bb == NULL) break;
1128 if (bb->visited == true) continue;
1129
1130 labelList[i].immOpnd.value = bb->startOffset;
1131
1132 if (bb->blockType >= kChainingCellLast) {
1133 /*
1134 * Append the label pseudo LIR first. Chaining cells will be handled
1135 * separately afterwards.
1136 */
1137 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1138 }
1139
1140 if (bb->blockType == kEntryBlock) {
1141 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1142 if (bb->firstMIRInsn == NULL) {
1143 continue;
1144 } else {
1145 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1146 //&labelList[blockList[i]->fallThrough->id]);
1147 }
1148 } else if (bb->blockType == kExitBlock) {
1149 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1150 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1151 goto gen_fallthrough;
1152 } else if (bb->blockType == kDalvikByteCode) {
1153 if (bb->hidden == true) continue;
1154 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1155 /* Reset the register state */
1156 #if 0
1157 resetRegisterScoreboard(cUnit);
1158 #endif
1159 } else {
1160 switch (bb->blockType) {
1161 case kChainingCellNormal:
1162 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1163 /* handle the codegen later */
1164 dvmInsertGrowableList(
1165 &chainingListByType[kChainingCellNormal], i);
1166 break;
1167 case kChainingCellInvokeSingleton:
1168 labelList[i].lop.opCode2 =
1169 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1170 labelList[i].immOpnd.value =
1171 (int) bb->containingMethod;
1172 /* handle the codegen later */
1173 dvmInsertGrowableList(
1174 &chainingListByType[kChainingCellInvokeSingleton], i);
1175 break;
1176 case kChainingCellInvokePredicted:
1177 labelList[i].lop.opCode2 =
1178 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1179 /*
1180 * Move the cached method pointer from operand 1 to 0.
1181 * Operand 0 was clobbered earlier in this routine to store
1182 * the block starting offset, which is not applicable to
1183 * predicted chaining cell.
1184 */
1185 //TODO
1186 //labelList[i].operands[0] = labelList[i].operands[1];
1187
1188 /* handle the codegen later */
1189 dvmInsertGrowableList(
1190 &chainingListByType[kChainingCellInvokePredicted], i);
1191 break;
1192 case kChainingCellHot:
1193 labelList[i].lop.opCode2 =
1194 ATOM_PSEUDO_CHAINING_CELL_HOT;
1195 /* handle the codegen later */
1196 dvmInsertGrowableList(
1197 &chainingListByType[kChainingCellHot], i);
1198 break;
1199 case kPCReconstruction:
1200 /* Make sure exception handling block is next */
1201 labelList[i].lop.opCode2 =
1202 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1203 //assert (i == cUnit->numBlocks - 2);
1204 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1205 handlePCReconstruction(cUnit,
1206 &labelList[cUnit->puntBlock->id]);
1207 break;
1208 case kExceptionHandling:
1209 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1210 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1211 //if (cUnit->pcReconstructionList.numUsed) {
1212 scratchRegs[0] = PhysicalReg_EAX;
1213 jumpToInterpPunt();
1214 //call_dvmJitToInterpPunt();
1215 //}
1216 break;
1217 case kChainingCellBackwardBranch:
1218 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1219 /* handle the codegen later */
1220 dvmInsertGrowableList(
1221 &chainingListByType[kChainingCellBackwardBranch],
1222 i);
1223 break;
1224 default:
1225 break;
1226 }
1227 continue;
1228 }
1229 {
1230 //LowOp *headLIR = NULL;
1231 const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1232 const u2 *startCodePtr = dexCode->insns;
1233 const u2 *codePtr;
1234 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1235 ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1236 bb->id, bb->blockType, bb->hidden);
1237 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1238 bb = nextBB;
1239 bb->visited = true;
1240 cUnit->nextCodegenBlock = NULL;
1241
1242 if(gDvm.executionMode == kExecutionModeNcgO1 &&
1243 bb->blockType != kEntryBlock &&
1244 bb->firstMIRInsn != NULL) {
1245 startOfBasicBlock(bb);
1246 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1247 endOfBasicBlock(bb);
1248 if(cg_ret < 0) {
1249 endOfTrace(true/*freeOnly*/);
1250 cUnit->baseAddr = NULL;
1251 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1252 return;
1253 }
1254 } else {
1255 for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1256 startOfBasicBlock(bb); //why here for O0
1257 Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1258 if((int)dalvikOpCode >= (int)kMirOpFirst) {
1259 handleExtendedMIR(cUnit, mir);
1260 continue;
1261 }
1262 InstructionFormat dalvikFormat =
1263 dexGetFormatFromOpcode(dalvikOpCode);
1264 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1265 mir->offset, dalvikOpCode, dalvikFormat);
1266 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1267 /* Remember the first LIR for this block */
1268 if (headLIR == NULL) {
1269 headLIR = (LowOp*)boundaryLIR;
1270 }
1271 bool notHandled = true;
1272 /*
1273 * Debugging: screen the opcode first to see if it is in the
1274 * do[-not]-compile list
1275 */
1276 bool singleStepMe =
1277 gDvmJit.includeSelectedOp !=
1278 ((gDvmJit.opList[dalvikOpCode >> 3] &
1279 (1 << (dalvikOpCode & 0x7))) !=
1280 0);
1281 if (singleStepMe || cUnit->allSingleStep) {
1282 } else {
1283 codePtr = startCodePtr + mir->offset;
1284 //lower each byte code, update LIR
1285 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1286 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1287 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1288 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1289 gDvmJit.codeCacheFull = true;
1290 cUnit->baseAddr = NULL;
1291 endOfTrace(true/*freeOnly*/);
1292 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1293 return;
1294 }
1295 }
1296 if (notHandled) {
1297 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1298 mir->offset,
1299 dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1300 dalvikFormat);
1301 dvmAbort();
1302 break;
1303 }
1304 } // end for
1305 } // end else //JIT + O0 code generator
1306 }
1307 } // end for
1308 /* Eliminate redundant loads/stores and delay stores into later slots */
1309 #if 0
1310 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1311 cUnit->lastLIRInsn);
1312 #endif
1313 if (headLIR) headLIR = NULL;
1314 gen_fallthrough:
1315 /*
1316 * Check if the block is terminated due to trace length constraint -
1317 * insert an unconditional branch to the chaining cell.
1318 */
1319 if (bb->needFallThroughBranch) {
1320 jumpToBasicBlock(stream, bb->fallThrough->id);
1321 }
1322
1323 }
1324
1325 char* streamChainingStart = (char*)stream;
1326 /* Handle the chaining cells in predefined order */
1327 for (i = 0; i < kChainingCellGap; i++) {
1328 size_t j;
1329 int *blockIdList = (int *) chainingListByType[i].elemList;
1330
1331 cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1332
1333 /* No chaining cells of this type */
1334 if (cUnit->numChainingCells[i] == 0)
1335 continue;
1336
1337 /* Record the first LIR for a new type of chaining cell */
1338 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1339 for (j = 0; j < chainingListByType[i].numUsed; j++) {
1340 int blockId = blockIdList[j];
1341 BasicBlock *chainingBlock =
1342 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1343 blockId);
1344
1345 labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1346
1347 /* Align this chaining cell first */
1348 #if 0
1349 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1350 #endif
1351 /* Insert the pseudo chaining instruction */
1352 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1353
1354
1355 switch (chainingBlock->blockType) {
1356 case kChainingCellNormal:
1357 handleNormalChainingCell(cUnit,
1358 chainingBlock->startOffset, blockId, labelList);
1359 break;
1360 case kChainingCellInvokeSingleton:
1361 handleInvokeSingletonChainingCell(cUnit,
1362 chainingBlock->containingMethod, blockId, labelList);
1363 break;
1364 case kChainingCellInvokePredicted:
1365 handleInvokePredictedChainingCell(cUnit, blockId);
1366 break;
1367 case kChainingCellHot:
1368 handleHotChainingCell(cUnit,
1369 chainingBlock->startOffset, blockId, labelList);
1370 break;
1371 case kChainingCellBackwardBranch:
1372 handleBackwardBranchChainingCell(cUnit,
1373 chainingBlock->startOffset, blockId, labelList);
1374 break;
1375 default:
1376 ALOGE("Bad blocktype %d", chainingBlock->blockType);
1377 dvmAbort();
1378 break;
1379 }
1380
1381 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1382 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1383 gDvmJit.codeCacheFull = true;
1384 cUnit->baseAddr = NULL;
1385 endOfTrace(true); /* need to free structures */
1386 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1387 return;
1388 }
1389 }
1390 }
1391 #if 0
1392 dvmCompilerApplyGlobalOptimizations(cUnit);
1393 #endif
1394 endOfTrace(false);
1395
1396 if (gDvmJit.codeCacheFull) {
1397 /* We hit code cache size limit inside endofTrace(false).
1398 * Bail out for this trace!
1399 */
1400 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1401 cUnit->baseAddr = NULL;
1402 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1403 return;
1404 }
1405
1406 /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1407 padding = (4 - ((u4)stream & 3)) & 3;
1408 stream += padding;
1409 ChainCellCounts chainCellCounts;
1410 /* Install the chaining cell counts */
1411 for (i=0; i< kChainingCellGap; i++) {
1412 chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1413 }
1414 char* streamCountStart = (char*)stream;
1415 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1416 stream += sizeof(chainCellCounts);
1417
1418 cUnit->baseAddr = streamMethodStart;
1419 cUnit->totalSize = (stream - streamStart);
1420 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1421 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1422 gDvmJit.codeCacheFull = true;
1423 cUnit->baseAddr = NULL;
1424 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1425 return;
1426 }
1427
1428 /* write chaining cell count offset & chaining cell offset */
1429 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1430 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1431 pOffset[1] = streamChainingStart - streamMethodStart;
1432
1433 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1434
1435 gDvmJit.codeCacheByteUsed += (stream - streamStart);
1436 if (cUnit->printMe) {
1437 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1438 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1439 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1440 cUnit->method->clazz->descriptor, cUnit->method->name,
1441 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1442 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1443 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1444 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1445 }
1446 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1447 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1448 cUnit->totalSize, gDvmJit.codeCache);
1449
1450 gDvmJit.numCompilations++;
1451
1452 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1453 }
1454
1455 /*
1456 * Perform translation chain operation.
1457 */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1458 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1459 {
1460 #ifdef JIT_CHAIN
1461 int relOffset = (int) tgtAddr - (int)branchAddr;
1462
1463 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1464 (gDvmJit.codeCacheFull == false)) {
1465
1466 gDvmJit.translationChains++;
1467
1468 //OpndSize immSize = estOpndSizeFromImm(relOffset);
1469 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1470 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1471 * the original code sequence.
1472 */
1473 OpndSize immSize = OpndSize_32;
1474 relOffset -= 5;
1475 //can't use stream here since it is used by the compilation thread
1476 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1477 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1478 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1479
1480 gDvmJit.hasNewChain = true;
1481
1482 COMPILER_TRACE_CHAINING(
1483 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1484 (int) branchAddr, tgtAddr, relOffset));
1485 }
1486 #endif
1487 return tgtAddr;
1488 }
1489
1490 /*
1491 * Accept the work and start compiling. Returns true if compilation
1492 * is attempted.
1493 */
dvmCompilerDoWork(CompilerWorkOrder * work)1494 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1495 {
1496 JitTraceDescription *desc;
1497 bool isCompile;
1498 bool success = true;
1499
1500 if (gDvmJit.codeCacheFull) {
1501 return false;
1502 }
1503
1504 switch (work->kind) {
1505 case kWorkOrderTrace:
1506 isCompile = true;
1507 /* Start compilation with maximally allowed trace length */
1508 desc = (JitTraceDescription *)work->info;
1509 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1510 work->bailPtr, 0 /* no hints */);
1511 break;
1512 case kWorkOrderTraceDebug: {
1513 bool oldPrintMe = gDvmJit.printMe;
1514 gDvmJit.printMe = true;
1515 isCompile = true;
1516 /* Start compilation with maximally allowed trace length */
1517 desc = (JitTraceDescription *)work->info;
1518 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1519 work->bailPtr, 0 /* no hints */);
1520 gDvmJit.printMe = oldPrintMe;
1521 break;
1522 }
1523 case kWorkOrderProfileMode:
1524 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1525 isCompile = false;
1526 break;
1527 default:
1528 isCompile = false;
1529 ALOGE("Jit: unknown work order type");
1530 assert(0); // Bail if debug build, discard otherwise
1531 }
1532 if (!success)
1533 work->result.codeAddress = NULL;
1534 return isCompile;
1535 }
1536
dvmCompilerCacheFlush(long start,long end,long flags)1537 void dvmCompilerCacheFlush(long start, long end, long flags) {
1538 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1539 }
1540
1541 //#endif
1542