1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe
30
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34
35 /*
36 * Determine the initial instruction set to be used for this trace.
37 * Later components may decide to change this.
38 */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42 return DALVIK_JIT_IA32;
43 }
44
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47 return DALVIK_JIT_IA32;
48 }
49
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53 return NULL;
54 }
55
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64 /* Target-specific configuration */
65 gDvmJit.jitTableSize = 1 << 12;
66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67 gDvmJit.threshold = 255;
68 gDvmJit.codeCacheSize = 512*1024;
69 gDvmJit.optLevel = kJitOptLevelO1;
70
71 #if defined(WITH_SELF_VERIFICATION)
72 /* Force into blocking mode */
73 gDvmJit.blockingMode = true;
74 gDvm.nativeDebuggerActive = true;
75 #endif
76
77 // Make sure all threads have current values
78 dvmJitUpdateThreadStateAll();
79
80 return true;
81 }
82
dvmCompilerPatchInlineCache(void)83 void dvmCompilerPatchInlineCache(void)
84 {
85 int i;
86 PredictedChainingCell *minAddr, *maxAddr;
87
88 /* Nothing to be done */
89 if (gDvmJit.compilerICPatchIndex == 0) return;
90
91 /*
92 * Since all threads are already stopped we don't really need to acquire
93 * the lock. But race condition can be easily introduced in the future w/o
94 * paying attention so we still acquire the lock here.
95 */
96 dvmLockMutex(&gDvmJit.compilerICPatchLock);
97
98 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
99
100 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
101
102 /* Initialize the min/max address range */
103 minAddr = (PredictedChainingCell *)
104 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
105 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
106
107 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
108 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
109 PredictedChainingCell *cellAddr = workOrder->cellAddr;
110 PredictedChainingCell *cellContent = &workOrder->cellContent;
111 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
112 workOrder->classLoader);
113
114 assert(clazz->serialNumber == workOrder->serialNumber);
115
116 /* Use the newly resolved clazz pointer */
117 cellContent->clazz = clazz;
118
119 if (cellAddr->clazz == NULL) {
120 COMPILER_TRACE_CHAINING(
121 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
122 cellAddr,
123 cellContent->clazz->descriptor,
124 cellContent->method->name));
125 } else {
126 COMPILER_TRACE_CHAINING(
127 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
128 "patched",
129 cellAddr,
130 cellAddr->clazz->descriptor,
131 cellContent->clazz->descriptor,
132 cellContent->method->name));
133 }
134
135 /* Patch the chaining cell */
136 *cellAddr = *cellContent;
137 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
138 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
139 }
140
141 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
142
143 gDvmJit.compilerICPatchIndex = 0;
144 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
145 }
146
147 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)148 void dvmCompilerCacheClear(char *start, size_t size)
149 {
150 /* "0xFF 0xFF" is an invalid opcode for x86. */
151 memset(start, 0xFF, size);
152 }
153
154 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)155 void dvmJitCalleeSave(double *saveArea) {
156 }
157
dvmJitCalleeRestore(double * saveArea)158 void dvmJitCalleeRestore(double *saveArea) {
159 }
160
dvmJitToInterpSingleStep()161 void dvmJitToInterpSingleStep() {
162 }
163
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)164 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
165 const JitEntry *knownEntry) {
166 return NULL;
167 }
168
dvmCompilerCodegenDump(CompilationUnit * cUnit)169 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
170 {
171 }
172
dvmCompilerArchDump(void)173 void dvmCompilerArchDump(void)
174 {
175 }
176
getTraceBase(const JitEntry * p)177 char *getTraceBase(const JitEntry *p)
178 {
179 return NULL;
180 }
181
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)182 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
183 {
184 }
185
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)186 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
187 {
188 }
189
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)190 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
191 {
192 // Method-based JIT not supported for x86.
193 }
194
dvmJitScanAllClassPointers(void (* callback)(void *))195 void dvmJitScanAllClassPointers(void (*callback)(void *))
196 {
197 }
198
199 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)200 static inline int getProfileCount(const JitEntry *entry)
201 {
202 if (entry->dPC == 0 || entry->codeAddress == 0)
203 return 0;
204 u4 *pExecutionCount = (u4 *) getTraceBase(entry);
205
206 return pExecutionCount ? *pExecutionCount : 0;
207 }
208
209 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)210 static int sortTraceProfileCount(const void *entry1, const void *entry2)
211 {
212 const JitEntry *jitEntry1 = (const JitEntry *)entry1;
213 const JitEntry *jitEntry2 = (const JitEntry *)entry2;
214
215 JitTraceCounter_t count1 = getProfileCount(jitEntry1);
216 JitTraceCounter_t count2 = getProfileCount(jitEntry2);
217 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
218 }
219
220 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()221 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
222 {
223 JitEntry *sortedEntries;
224 int numTraces = 0;
225 unsigned long counts = 0;
226 unsigned int i;
227
228 /* Make sure that the table is not changing */
229 dvmLockMutex(&gDvmJit.tableLock);
230
231 /* Sort the entries by descending order */
232 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
233 if (sortedEntries == NULL)
234 goto done;
235 memcpy(sortedEntries, gDvmJit.pJitEntryTable,
236 sizeof(JitEntry) * gDvmJit.jitTableSize);
237 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
238 sortTraceProfileCount);
239
240 /* Dump the sorted entries */
241 for (i=0; i < gDvmJit.jitTableSize; i++) {
242 if (sortedEntries[i].dPC != 0) {
243 numTraces++;
244 }
245 }
246 if (numTraces == 0)
247 numTraces = 1;
248 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
249
250 free(sortedEntries);
251 done:
252 dvmUnlockMutex(&gDvmJit.tableLock);
253 return;
254 }
255
jumpWithRelOffset(char * instAddr,int relOffset)256 void jumpWithRelOffset(char* instAddr, int relOffset) {
257 stream = instAddr;
258 OpndSize immSize = estOpndSizeFromImm(relOffset);
259 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
260 dump_imm(Mnemonic_JMP, immSize, relOffset);
261 }
262
263 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)264 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
265 stream = instAddr;
266 bool unknown;
267 OpndSize size;
268 int relativeNCG = targetId;
269 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
270 unconditional_jump_int(relativeNCG, size);
271 return NULL;
272 }
273
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)274 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
275 stream = instAddr;
276 bool unknown;
277 OpndSize size;
278 int relativeNCG = targetId;
279 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
280 conditional_jump_int(cc, relativeNCG, size);
281 return NULL;
282 }
283
284 /*
285 * Attempt to enqueue a work order to patch an inline cache for a predicted
286 * chaining cell for virtual/interface calls.
287 */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)288 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
289 PredictedChainingCell *newContent)
290 {
291 bool result = true;
292
293 /*
294 * Make sure only one thread gets here since updating the cell (ie fast
295 * path and queueing the request (ie the queued path) have to be done
296 * in an atomic fashion.
297 */
298 dvmLockMutex(&gDvmJit.compilerICPatchLock);
299
300 /* Fast path for uninitialized chaining cell */
301 if (cellAddr->clazz == NULL &&
302 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
303 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
304
305 cellAddr->method = newContent->method;
306 cellAddr->branch = newContent->branch;
307 cellAddr->branch2 = newContent->branch2;
308
309 /*
310 * The update order matters - make sure clazz is updated last since it
311 * will bring the uninitialized chaining cell to life.
312 */
313 android_atomic_release_store((int32_t)newContent->clazz,
314 (volatile int32_t *)(void*) &cellAddr->clazz);
315 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
316 UPDATE_CODE_CACHE_PATCHES();
317
318 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
319
320 #if 0
321 MEM_BARRIER();
322 cellAddr->clazz = newContent->clazz;
323 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
324 #endif
325 #if defined(IA_JIT_TUNING)
326 gDvmJit.icPatchInit++;
327 #endif
328 COMPILER_TRACE_CHAINING(
329 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
330 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
331 /* Check if this is a frequently missed clazz */
332 } else if (cellAddr->stagedClazz != newContent->clazz) {
333 /* Not proven to be frequent yet - build up the filter cache */
334 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
335
336 cellAddr->stagedClazz = newContent->clazz;
337
338 UPDATE_CODE_CACHE_PATCHES();
339 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340
341 #if defined(WITH_JIT_TUNING)
342 gDvmJit.icPatchRejected++;
343 #endif
344 /*
345 * Different classes but same method implementation - it is safe to just
346 * patch the class value without the need to stop the world.
347 */
348 } else if (cellAddr->method == newContent->method) {
349 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
350
351 cellAddr->clazz = newContent->clazz;
352 /* No need to flush the cache here since the branch is not patched */
353 UPDATE_CODE_CACHE_PATCHES();
354
355 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
356
357 #if defined(WITH_JIT_TUNING)
358 gDvmJit.icPatchLockFree++;
359 #endif
360 /*
361 * Cannot patch the chaining cell inline - queue it until the next safe
362 * point.
363 */
364 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) {
365 int index = gDvmJit.compilerICPatchIndex++;
366 const ClassObject *clazz = newContent->clazz;
367
368 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
369 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
370 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
371 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
372 /* For verification purpose only */
373 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
374
375 #if defined(WITH_JIT_TUNING)
376 gDvmJit.icPatchQueued++;
377 #endif
378 COMPILER_TRACE_CHAINING(
379 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
380 cellAddr, newContent->clazz->descriptor, newContent->method->name));
381 } else {
382 /* Queue is full - just drop this patch request */
383 #if defined(WITH_JIT_TUNING)
384 gDvmJit.icPatchDropped++;
385 #endif
386
387 COMPILER_TRACE_CHAINING(
388 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
389 cellAddr, newContent->clazz->descriptor, newContent->method->name));
390 }
391
392 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
393 return result;
394 }
395
396 /*
397 * This method is called from the invoke templates for virtual and interface
398 * methods to speculatively setup a chain to the callee. The templates are
399 * written in assembly and have setup method, cell, and clazz at r0, r2, and
400 * r3 respectively, so there is a unused argument in the list. Upon return one
401 * of the following three results may happen:
402 * 1) Chain is not setup because the callee is native. Reset the rechain
403 * count to a big number so that it will take a long time before the next
404 * rechain attempt to happen.
405 * 2) Chain is not setup because the callee has not been created yet. Reset
406 * the rechain count to a small number and retry in the near future.
407 * 3) Ask all other threads to stop before patching this chaining cell.
408 * This is required because another thread may have passed the class check
409 * but hasn't reached the chaining cell yet to follow the chain. If we
410 * patch the content before halting the other thread, there could be a
411 * small window for race conditions to happen that it may follow the new
412 * but wrong chain to invoke a different method.
413 */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)414 const Method *dvmJitToPatchPredictedChain(const Method *method,
415 Thread *self,
416 PredictedChainingCell *cell,
417 const ClassObject *clazz)
418 {
419 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
420 /* Don't come back here for a long time if the method is native */
421 if (dvmIsNativeMethod(method)) {
422 UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
423
424 /*
425 * Put a non-zero/bogus value in the clazz field so that it won't
426 * trigger immediate patching and will continue to fail to match with
427 * a real clazz pointer.
428 */
429 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
430
431 UPDATE_CODE_CACHE_PATCHES();
432 PROTECT_CODE_CACHE(cell, sizeof(*cell));
433 COMPILER_TRACE_CHAINING(
434 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
435 cell, method->name));
436 goto done;
437 }
438 {
439 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
440
441 /*
442 * Compilation not made yet for the callee. Reset the counter to a small
443 * value and come back to check soon.
444 */
445 if ((tgtAddr == 0) ||
446 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
447 COMPILER_TRACE_CHAINING(
448 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
449 cell, method->clazz->descriptor, method->name));
450 goto done;
451 }
452
453 PredictedChainingCell newCell;
454
455 if (cell->clazz == NULL) {
456 newRechainCount = self->icRechainCount;
457 }
458
459 int relOffset = (int) tgtAddr - (int)cell;
460 OpndSize immSize = estOpndSizeFromImm(relOffset);
461 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
462 relOffset -= jumpSize;
463 COMPILER_TRACE_CHAINING(
464 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
465 cell, method->clazz->descriptor, method->name, jumpSize));
466 //can't use stream here since it is used by the compilation thread
467 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
468
469 newCell.clazz = clazz;
470 newCell.method = method;
471
472 /*
473 * Enter the work order to the queue and the chaining cell will be patched
474 * the next time a safe point is entered.
475 *
476 * If the enqueuing fails reset the rechain count to a normal value so that
477 * it won't get indefinitely delayed.
478 */
479 inlineCachePatchEnqueue(cell, &newCell);
480 }
481 done:
482 self->icRechainCount = newRechainCount;
483 return method;
484 }
485
486 /*
487 * Unchain a trace given the starting address of the translation
488 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
489 * For ARM, it returns the address following the last cell unchained.
490 * For IA, it returns NULL since cacheflush is not required for IA.
491 */
dvmJitUnchain(void * codeAddr)492 u4* dvmJitUnchain(void* codeAddr)
493 {
494 /* codeAddr is 4-byte aligned, so is chain cell count offset */
495 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
496 u2 chainCellCountOffset = *pChainCellCountOffset;
497 /* chain cell counts information is 4-byte aligned */
498 ChainCellCounts *pChainCellCounts =
499 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
500 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
501 u2 chainCellOffset = *pChainCellOffset;
502 u1* pChainCells;
503 int i,j;
504 PredictedChainingCell *predChainCell;
505 int padding;
506
507 /* Locate the beginning of the chain cell region */
508 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
509
510 /* The cells are sorted in order - walk through them and reset */
511 for (i = 0; i < kChainingCellGap; i++) {
512 /* for hot, normal, singleton chaining:
513 nop //padding.
514 jmp 0
515 mov imm32, reg1
516 mov imm32, reg2
517 call reg2
518 after chaining:
519 nop
520 jmp imm
521 mov imm32, reg1
522 mov imm32, reg2
523 call reg2
524 after unchaining:
525 nop
526 jmp 0
527 mov imm32, reg1
528 mov imm32, reg2
529 call reg2
530 Space occupied by the chaining cell in bytes: nop is for padding,
531 jump 0, the target 0 is 4 bytes aligned.
532 Space for predicted chaining: 5 words = 20 bytes
533 */
534 int elemSize = 0;
535 if (i == kChainingCellInvokePredicted) {
536 elemSize = 20;
537 }
538 COMPILER_TRACE_CHAINING(
539 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
540
541 for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
542 switch(i) {
543 case kChainingCellNormal:
544 case kChainingCellHot:
545 case kChainingCellInvokeSingleton:
546 case kChainingCellBackwardBranch:
547 COMPILER_TRACE_CHAINING(
548 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
549 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
550 elemSize = 4+5+5+2;
551 memset(pChainCells, 0, 4);
552 break;
553 case kChainingCellInvokePredicted:
554 COMPILER_TRACE_CHAINING(
555 ALOGI("Jit Runtime: unchaining of predicted"));
556 /* 4-byte aligned */
557 padding = (4 - ((u4)pChainCells & 3)) & 3;
558 pChainCells += padding;
559 predChainCell = (PredictedChainingCell *) pChainCells;
560 /*
561 * There could be a race on another mutator thread to use
562 * this particular predicted cell and the check has passed
563 * the clazz comparison. So we cannot safely wipe the
564 * method and branch but it is safe to clear the clazz,
565 * which serves as the key.
566 */
567 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
568 break;
569 default:
570 ALOGE("Unexpected chaining type: %d", i);
571 dvmAbort(); // dvmAbort OK here - can't safely recover
572 }
573 COMPILER_TRACE_CHAINING(
574 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
575 pChainCells += elemSize; /* Advance by a fixed number of bytes */
576 }
577 }
578 return NULL;
579 }
580
581 /* Unchain all translation in the cache. */
dvmJitUnchainAll()582 void dvmJitUnchainAll()
583 {
584 ALOGV("Jit Runtime: unchaining all");
585 if (gDvmJit.pJitEntryTable != NULL) {
586 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
587 dvmLockMutex(&gDvmJit.tableLock);
588
589 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
590
591 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
592 if (gDvmJit.pJitEntryTable[i].dPC &&
593 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
594 gDvmJit.pJitEntryTable[i].codeAddress) {
595 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
596 }
597 }
598
599 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
600
601 dvmUnlockMutex(&gDvmJit.tableLock);
602 gDvmJit.translationChains = 0;
603 }
604 gDvmJit.hasNewChain = false;
605 }
606
607 #define P_GPR_1 PhysicalReg_EBX
608 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()609 static void insertJumpHelp()
610 {
611 int rem = (uint)stream % 4;
612 int nop_size = 3 - rem;
613 dump_nop(nop_size);
614 unconditional_jump_int(0, OpndSize_32);
615 return;
616 }
617
618 /* Chaining cell for code that may need warmup. */
619 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
620 blx r0
621 data 0xb23a //bytecode address: 0x5115b23a
622 data 0x5115
623 IA32 assembly:
624 jmp 0 //5 bytes
625 movl address, %ebx
626 movl dvmJitToInterpNormal, %eax
627 call %eax
628 <-- return address
629 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)630 static void handleNormalChainingCell(CompilationUnit *cUnit,
631 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
632 {
633 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
634 cUnit->method->name, blockId, offset, stream - streamMethodStart);
635 if(dump_x86_inst)
636 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
637 offset, stream - streamMethodStart, stream);
638 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
639 * reslove the multithreading issue.
640 */
641 insertJumpHelp();
642 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
643 scratchRegs[0] = PhysicalReg_EAX;
644 call_dvmJitToInterpNormal();
645 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
646 }
647
648 /*
649 * Chaining cell for instructions that immediately following already translated
650 * code.
651 */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)652 static void handleHotChainingCell(CompilationUnit *cUnit,
653 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
654 {
655 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
656 cUnit->method->name, blockId, offset, stream - streamMethodStart);
657 if(dump_x86_inst)
658 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
659 offset, stream - streamMethodStart, stream);
660 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
661 * reslove the multithreading issue.
662 */
663 insertJumpHelp();
664 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
665 scratchRegs[0] = PhysicalReg_EAX;
666 call_dvmJitToInterpTraceSelect();
667 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
668 }
669
670 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)671 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
672 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
673 {
674 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
675 cUnit->method->name, blockId, offset, stream - streamMethodStart);
676 if(dump_x86_inst)
677 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
678 offset, stream - streamMethodStart, stream);
679 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
680 * reslove the multithreading issue.
681 */
682 insertJumpHelp();
683 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
684 scratchRegs[0] = PhysicalReg_EAX;
685 call_dvmJitToInterpNormal();
686 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
687 }
688
689 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)690 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
691 const Method *callee, int blockId, LowOpBlockLabel* labelList)
692 {
693 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
694 cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
695 if(dump_x86_inst)
696 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
697 blockId, stream - streamMethodStart, stream);
698 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
699 * reslove the multithreading issue.
700 */
701 insertJumpHelp();
702 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
703 scratchRegs[0] = PhysicalReg_EAX;
704 call_dvmJitToInterpTraceSelect();
705 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
706 }
707 #undef P_GPR_1
708
709 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)710 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
711 {
712 if(dump_x86_inst)
713 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
714 blockId, stream - streamMethodStart, stream);
715 #ifndef PREDICTED_CHAINING
716 //assume rPC for callee->insns in %ebx
717 scratchRegs[0] = PhysicalReg_EAX;
718 call_dvmJitToInterpTraceSelectNoChain();
719 #else
720 /* make sure section for predicited chaining cell is 4-byte aligned */
721 //int padding = (4 - ((u4)stream & 3)) & 3;
722 //stream += padding;
723 int* streamData = (int*)stream;
724 /* Should not be executed in the initial state */
725 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
726 streamData[1] = 0;
727 /* To be filled: class */
728 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
729 /* To be filled: method */
730 streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
731 /*
732 * Rechain count. The initial value of 0 here will trigger chaining upon
733 * the first invocation of this callsite.
734 */
735 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
736 #if 0
737 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
738 *((int*)(stream+8)), *((int*)(stream+12)));
739 #endif
740 stream += 20; //5 *4
741 #endif
742 }
743
744 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)745 static void handlePCReconstruction(CompilationUnit *cUnit,
746 LowOpBlockLabel *targetLabel)
747 {
748 #if 0
749 LowOp **pcrLabel =
750 (LowOp **) cUnit->pcReconstructionList.elemList;
751 int numElems = cUnit->pcReconstructionList.numUsed;
752 int i;
753 for (i = 0; i < numElems; i++) {
754 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
755 /* r0 = dalvik PC */
756 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
757 genUnconditionalBranch(cUnit, targetLabel);
758 }
759 #endif
760 }
761
762 //use O0 code generator for hoisted checks outside of the loop
763 /*
764 * vA = arrayReg;
765 * vB = idxReg;
766 * vC = endConditionReg;
767 * arg[0] = maxC
768 * arg[1] = minC
769 * arg[2] = loopBranchConditionCode
770 */
771 #define P_GPR_1 PhysicalReg_EBX
772 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)773 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
774 {
775 /*
776 * NOTE: these synthesized blocks don't have ssa names assigned
777 * for Dalvik registers. However, because they dominate the following
778 * blocks we can simply use the Dalvik name w/ subscript 0 as the
779 * ssa name.
780 */
781 DecodedInstruction *dInsn = &mir->dalvikInsn;
782 const int maxC = dInsn->arg[0];
783
784 /* assign array in virtual register to P_GPR_1 */
785 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
786 /* assign index in virtual register to P_GPR_2 */
787 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
788 export_pc();
789 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
790 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
791 int delta = maxC;
792 /*
793 * If the loop end condition is ">=" instead of ">", then the largest value
794 * of the index is "endCondition - 1".
795 */
796 if (dInsn->arg[2] == OP_IF_GE) {
797 delta--;
798 }
799
800 if (delta < 0) { //+delta
801 //if P_GPR_2 is mapped to a VR, we can't do this
802 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
803 } else if(delta > 0) {
804 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
805 }
806 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
807 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
808 }
809
810 /*
811 * vA = arrayReg;
812 * vB = idxReg;
813 * vC = endConditionReg;
814 * arg[0] = maxC
815 * arg[1] = minC
816 * arg[2] = loopBranchConditionCode
817 */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)818 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
819 {
820 DecodedInstruction *dInsn = &mir->dalvikInsn;
821 const int maxC = dInsn->arg[0];
822
823 /* assign array in virtual register to P_GPR_1 */
824 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
825 /* assign index in virtual register to P_GPR_2 */
826 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
827 export_pc();
828 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
829 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
830
831 if (maxC < 0) {
832 //if P_GPR_2 is mapped to a VR, we can't do this
833 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
834 } else if(maxC > 0) {
835 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
836 }
837 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
838 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
839
840 }
841 #undef P_GPR_1
842 #undef P_GPR_2
843
844 /*
845 * vA = idxReg;
846 * vB = minC;
847 */
848 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)849 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
850 {
851 DecodedInstruction *dInsn = &mir->dalvikInsn;
852 const int minC = dInsn->vB;
853 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
854 export_pc();
855 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
856 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
857 }
858 #undef P_GPR_1
859
860 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)861 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
862 {
863 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
864 if(gDvm.executionMode == kExecutionModeNcgO0) {
865 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
866 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
867 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
868 export_pc(); //use %edx
869 conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
870 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
871 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
872 } else {
873 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
874 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
875 nullCheck(5, false, 1, mir->dalvikInsn.vC);
876 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
877 compare_reg_reg(4, false, 6, false);
878 }
879
880 //immdiate will be updated later in genLandingPadForMispredictedCallee
881 streamMisPred = stream;
882 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
883 }
884 #endif
885
886 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)887 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
888 {
889 ExecutionMode origMode = gDvm.executionMode;
890 gDvm.executionMode = kExecutionModeNcgO0;
891 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
892 case kMirOpPhi: {
893 break;
894 }
895 case kMirOpNullNRangeUpCheck: {
896 genHoistedChecksForCountUpLoop(cUnit, mir);
897 break;
898 }
899 case kMirOpNullNRangeDownCheck: {
900 genHoistedChecksForCountDownLoop(cUnit, mir);
901 break;
902 }
903 case kMirOpLowerBound: {
904 genHoistedLowerBoundCheck(cUnit, mir);
905 break;
906 }
907 case kMirOpPunt: {
908 break;
909 }
910 #ifdef WITH_JIT_INLINING
911 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
912 genValidationForPredictedInline(cUnit, mir);
913 break;
914 }
915 #endif
916 default:
917 break;
918 }
919 gDvm.executionMode = origMode;
920 }
921
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)922 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
923 int bodyId)
924 {
925 /*
926 * Next, create two branches - one branch over to the loop body and the
927 * other branch to the PCR cell to punt.
928 */
929 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
930 //setupResourceMasks(branchToBody);
931 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
932
933 #if 0
934 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
935 branchToPCR->opCode = kThumbBUncond;
936 branchToPCR->generic.target = (LIR *) pcrLabel;
937 setupResourceMasks(branchToPCR);
938 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
939 #endif
940 }
941
942 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)943 bool mergeBlock(BasicBlock *bb) {
944 if(bb->blockType == kDalvikByteCode &&
945 bb->firstMIRInsn != NULL &&
946 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
947 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
948 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
949 bb->fallThrough == NULL) {// &&
950 //cUnit->hasLoop) {
951 //ALOGI("merge blocks ending with goto at index %d", i);
952 MIR* prevInsn = bb->lastMIRInsn->prev;
953 if(bb->taken == NULL) return false;
954 MIR* mergeInsn = bb->taken->firstMIRInsn;
955 if(mergeInsn == NULL) return false;
956 if(prevInsn == NULL) {//the block has a single instruction
957 bb->firstMIRInsn = mergeInsn;
958 } else {
959 prevInsn->next = mergeInsn; //remove goto from the chain
960 }
961 mergeInsn->prev = prevInsn;
962 bb->lastMIRInsn = bb->taken->lastMIRInsn;
963 bb->taken->firstMIRInsn = NULL; //block being merged in
964 bb->fallThrough = bb->taken->fallThrough;
965 bb->taken = bb->taken->taken;
966 return true;
967 }
968 return false;
969 }
970
genTraceProfileEntry(CompilationUnit * cUnit)971 static int genTraceProfileEntry(CompilationUnit *cUnit)
972 {
973 cUnit->headerSize = 6;
974 if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
975 (gDvmJit.profileMode == kTraceProfilingDisabled)) {
976 return 12;
977 } else {
978 return 4;
979 }
980
981 }
982
983 #define PRINT_BUFFER_LEN 1024
984 /* Print the code block in code cache in the range of [startAddr, endAddr)
985 * in readable format.
986 */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)987 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
988 {
989 char strbuf[PRINT_BUFFER_LEN];
990 unsigned char *addr;
991 unsigned char *next_addr;
992 int n;
993
994 if (gDvmJit.printBinary) {
995 // print binary in bytes
996 n = 0;
997 for (addr = startAddr; addr < endAddr; addr++) {
998 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
999 if (n > PRINT_BUFFER_LEN - 10) {
1000 ALOGD("## %s", strbuf);
1001 n = 0;
1002 }
1003 }
1004 if (n > 0)
1005 ALOGD("## %s", strbuf);
1006 }
1007
1008 // print disassembled instructions
1009 addr = startAddr;
1010 while (addr < endAddr) {
1011 next_addr = reinterpret_cast<unsigned char*>
1012 (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1013 strbuf, PRINT_BUFFER_LEN));
1014 if (addr != next_addr) {
1015 ALOGD("** %p: %s", addr, strbuf);
1016 } else { // check whether this is nop padding
1017 if (addr[0] == 0x90) {
1018 ALOGD("** %p: NOP (1 byte)", addr);
1019 next_addr += 1;
1020 } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1021 ALOGD("** %p: NOP (2 bytes)", addr);
1022 next_addr += 2;
1023 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1024 ALOGD("** %p: NOP (3 bytes)", addr);
1025 next_addr += 3;
1026 } else {
1027 ALOGD("** unable to decode binary at %p", addr);
1028 break;
1029 }
1030 }
1031 addr = next_addr;
1032 }
1033 }
1034
1035 /* 4 is the number of additional bytes needed for chaining information for trace:
1036 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1037 #define EXTRA_BYTES_FOR_CHAINING 4
1038
1039 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1040 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1041 {
1042 dump_x86_inst = cUnit->printMe;
1043 /* Used to hold the labels of each block */
1044 LowOpBlockLabel *labelList =
1045 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1046 LowOp *headLIR = NULL;
1047 GrowableList chainingListByType[kChainingCellLast];
1048 unsigned int i, padding;
1049
1050 /*
1051 * Initialize various types chaining lists.
1052 */
1053 for (i = 0; i < kChainingCellLast; i++) {
1054 dvmInitGrowableList(&chainingListByType[i], 2);
1055 }
1056
1057 /* Clear the visited flag for each block */
1058 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1059 kAllNodes, false /* isIterative */);
1060
1061 GrowableListIterator iterator;
1062 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1063
1064 /* Traces start with a profiling entry point. Generate it here */
1065 cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1066
1067 //BasicBlock **blockList = cUnit->blockList;
1068 GrowableList *blockList = &cUnit->blockList;
1069 BasicBlock *bb;
1070
1071 info->codeAddress = NULL;
1072 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1073
1074 // TODO: compile into a temporary buffer and then copy into the code cache.
1075 // That would let us leave the code cache unprotected for a shorter time.
1076 size_t unprotected_code_cache_bytes =
1077 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1078 UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1079
1080 streamStart = stream; /* trace start before alignment */
1081 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1082 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1083 streamMethodStart = stream; /* code start */
1084 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1085 labelList[i].lop.generic.offset = -1;
1086 }
1087 cUnit->exceptionBlockId = -1;
1088 for (i = 0; i < blockList->numUsed; i++) {
1089 bb = (BasicBlock *) blockList->elemList[i];
1090 if(bb->blockType == kExceptionHandling)
1091 cUnit->exceptionBlockId = i;
1092 }
1093 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1094 if(gDvm.executionMode == kExecutionModeNcgO1) {
1095 //merge blocks ending with "goto" with the fall through block
1096 if (cUnit->jitMode != kJitLoop)
1097 for (i = 0; i < blockList->numUsed; i++) {
1098 bb = (BasicBlock *) blockList->elemList[i];
1099 bool merged = mergeBlock(bb);
1100 while(merged) merged = mergeBlock(bb);
1101 }
1102 for (i = 0; i < blockList->numUsed; i++) {
1103 bb = (BasicBlock *) blockList->elemList[i];
1104 if(bb->blockType == kDalvikByteCode &&
1105 bb->firstMIRInsn != NULL) {
1106 preprocessingBB(bb);
1107 }
1108 }
1109 preprocessingTrace();
1110 }
1111
1112 /* Handle the content in each basic block */
1113 for (i = 0; ; i++) {
1114 MIR *mir;
1115 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1116 if (bb == NULL) break;
1117 if (bb->visited == true) continue;
1118
1119 labelList[i].immOpnd.value = bb->startOffset;
1120
1121 if (bb->blockType >= kChainingCellLast) {
1122 /*
1123 * Append the label pseudo LIR first. Chaining cells will be handled
1124 * separately afterwards.
1125 */
1126 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1127 }
1128
1129 if (bb->blockType == kEntryBlock) {
1130 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1131 if (bb->firstMIRInsn == NULL) {
1132 continue;
1133 } else {
1134 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1135 //&labelList[blockList[i]->fallThrough->id]);
1136 }
1137 } else if (bb->blockType == kExitBlock) {
1138 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1139 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1140 goto gen_fallthrough;
1141 } else if (bb->blockType == kDalvikByteCode) {
1142 if (bb->hidden == true) continue;
1143 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1144 /* Reset the register state */
1145 #if 0
1146 resetRegisterScoreboard(cUnit);
1147 #endif
1148 } else {
1149 switch (bb->blockType) {
1150 case kChainingCellNormal:
1151 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1152 /* handle the codegen later */
1153 dvmInsertGrowableList(
1154 &chainingListByType[kChainingCellNormal], i);
1155 break;
1156 case kChainingCellInvokeSingleton:
1157 labelList[i].lop.opCode2 =
1158 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1159 labelList[i].immOpnd.value =
1160 (int) bb->containingMethod;
1161 /* handle the codegen later */
1162 dvmInsertGrowableList(
1163 &chainingListByType[kChainingCellInvokeSingleton], i);
1164 break;
1165 case kChainingCellInvokePredicted:
1166 labelList[i].lop.opCode2 =
1167 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1168 /*
1169 * Move the cached method pointer from operand 1 to 0.
1170 * Operand 0 was clobbered earlier in this routine to store
1171 * the block starting offset, which is not applicable to
1172 * predicted chaining cell.
1173 */
1174 //TODO
1175 //labelList[i].operands[0] = labelList[i].operands[1];
1176
1177 /* handle the codegen later */
1178 dvmInsertGrowableList(
1179 &chainingListByType[kChainingCellInvokePredicted], i);
1180 break;
1181 case kChainingCellHot:
1182 labelList[i].lop.opCode2 =
1183 ATOM_PSEUDO_CHAINING_CELL_HOT;
1184 /* handle the codegen later */
1185 dvmInsertGrowableList(
1186 &chainingListByType[kChainingCellHot], i);
1187 break;
1188 case kPCReconstruction:
1189 /* Make sure exception handling block is next */
1190 labelList[i].lop.opCode2 =
1191 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1192 //assert (i == cUnit->numBlocks - 2);
1193 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1194 handlePCReconstruction(cUnit,
1195 &labelList[cUnit->puntBlock->id]);
1196 break;
1197 case kExceptionHandling:
1198 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1199 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1200 //if (cUnit->pcReconstructionList.numUsed) {
1201 scratchRegs[0] = PhysicalReg_EAX;
1202 jumpToInterpPunt();
1203 //call_dvmJitToInterpPunt();
1204 //}
1205 break;
1206 case kChainingCellBackwardBranch:
1207 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1208 /* handle the codegen later */
1209 dvmInsertGrowableList(
1210 &chainingListByType[kChainingCellBackwardBranch],
1211 i);
1212 break;
1213 default:
1214 break;
1215 }
1216 continue;
1217 }
1218 {
1219 //LowOp *headLIR = NULL;
1220 const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1221 const u2 *startCodePtr = dexCode->insns;
1222 const u2 *codePtr;
1223 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1224 ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1225 bb->id, bb->blockType, bb->hidden);
1226 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1227 bb = nextBB;
1228 bb->visited = true;
1229 cUnit->nextCodegenBlock = NULL;
1230
1231 if(gDvm.executionMode == kExecutionModeNcgO1 &&
1232 bb->blockType != kEntryBlock &&
1233 bb->firstMIRInsn != NULL) {
1234 startOfBasicBlock(bb);
1235 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1236 endOfBasicBlock(bb);
1237 if(cg_ret < 0) {
1238 endOfTrace(true/*freeOnly*/);
1239 cUnit->baseAddr = NULL;
1240 ALOGI("codeGenBasicBlockJit returns negative number");
1241 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1242 return;
1243 }
1244 } else {
1245 for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1246 startOfBasicBlock(bb); //why here for O0
1247 Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1248 if((int)dalvikOpCode >= (int)kMirOpFirst) {
1249 handleExtendedMIR(cUnit, mir);
1250 continue;
1251 }
1252 InstructionFormat dalvikFormat =
1253 dexGetFormatFromOpcode(dalvikOpCode);
1254 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1255 mir->offset, dalvikOpCode, dalvikFormat);
1256 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1257 /* Remember the first LIR for this block */
1258 if (headLIR == NULL) {
1259 headLIR = (LowOp*)boundaryLIR;
1260 }
1261 bool notHandled = true;
1262 /*
1263 * Debugging: screen the opcode first to see if it is in the
1264 * do[-not]-compile list
1265 */
1266 bool singleStepMe =
1267 gDvmJit.includeSelectedOp !=
1268 ((gDvmJit.opList[dalvikOpCode >> 3] &
1269 (1 << (dalvikOpCode & 0x7))) !=
1270 0);
1271 if (singleStepMe || cUnit->allSingleStep) {
1272 } else {
1273 codePtr = startCodePtr + mir->offset;
1274 //lower each byte code, update LIR
1275 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1276 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1277 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1278 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1279 gDvmJit.codeCacheFull = true;
1280 cUnit->baseAddr = NULL;
1281 endOfTrace(true/*freeOnly*/);
1282 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1283 return;
1284 }
1285 }
1286 if (notHandled) {
1287 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1288 mir->offset,
1289 dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1290 dalvikFormat);
1291 dvmAbort();
1292 break;
1293 }
1294 } // end for
1295 } // end else //JIT + O0 code generator
1296 }
1297 } // end for
1298 /* Eliminate redundant loads/stores and delay stores into later slots */
1299 #if 0
1300 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1301 cUnit->lastLIRInsn);
1302 #endif
1303 if (headLIR) headLIR = NULL;
1304 gen_fallthrough:
1305 /*
1306 * Check if the block is terminated due to trace length constraint -
1307 * insert an unconditional branch to the chaining cell.
1308 */
1309 if (bb->needFallThroughBranch) {
1310 jumpToBasicBlock(stream, bb->fallThrough->id);
1311 }
1312
1313 }
1314
1315 char* streamChainingStart = (char*)stream;
1316 /* Handle the chaining cells in predefined order */
1317 for (i = 0; i < kChainingCellGap; i++) {
1318 size_t j;
1319 int *blockIdList = (int *) chainingListByType[i].elemList;
1320
1321 cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1322
1323 /* No chaining cells of this type */
1324 if (cUnit->numChainingCells[i] == 0)
1325 continue;
1326
1327 /* Record the first LIR for a new type of chaining cell */
1328 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1329 for (j = 0; j < chainingListByType[i].numUsed; j++) {
1330 int blockId = blockIdList[j];
1331 BasicBlock *chainingBlock =
1332 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1333 blockId);
1334
1335 labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1336
1337 /* Align this chaining cell first */
1338 #if 0
1339 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1340 #endif
1341 /* Insert the pseudo chaining instruction */
1342 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1343
1344
1345 switch (chainingBlock->blockType) {
1346 case kChainingCellNormal:
1347 handleNormalChainingCell(cUnit,
1348 chainingBlock->startOffset, blockId, labelList);
1349 break;
1350 case kChainingCellInvokeSingleton:
1351 handleInvokeSingletonChainingCell(cUnit,
1352 chainingBlock->containingMethod, blockId, labelList);
1353 break;
1354 case kChainingCellInvokePredicted:
1355 handleInvokePredictedChainingCell(cUnit, blockId);
1356 break;
1357 case kChainingCellHot:
1358 handleHotChainingCell(cUnit,
1359 chainingBlock->startOffset, blockId, labelList);
1360 break;
1361 case kChainingCellBackwardBranch:
1362 handleBackwardBranchChainingCell(cUnit,
1363 chainingBlock->startOffset, blockId, labelList);
1364 break;
1365 default:
1366 ALOGE("Bad blocktype %d", chainingBlock->blockType);
1367 dvmAbort();
1368 break;
1369 }
1370
1371 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1372 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1373 gDvmJit.codeCacheFull = true;
1374 cUnit->baseAddr = NULL;
1375 endOfTrace(true); /* need to free structures */
1376 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1377 return;
1378 }
1379 }
1380 }
1381 #if 0
1382 dvmCompilerApplyGlobalOptimizations(cUnit);
1383 #endif
1384 endOfTrace(false);
1385
1386 if (gDvmJit.codeCacheFull) {
1387 /* We hit code cache size limit inside endofTrace(false).
1388 * Bail out for this trace!
1389 */
1390 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1391 cUnit->baseAddr = NULL;
1392 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1393 return;
1394 }
1395
1396 /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1397 padding = (4 - ((u4)stream & 3)) & 3;
1398 stream += padding;
1399 ChainCellCounts chainCellCounts;
1400 /* Install the chaining cell counts */
1401 for (i=0; i< kChainingCellGap; i++) {
1402 chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1403 }
1404 char* streamCountStart = (char*)stream;
1405 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1406 stream += sizeof(chainCellCounts);
1407
1408 cUnit->baseAddr = streamMethodStart;
1409 cUnit->totalSize = (stream - streamStart);
1410 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1411 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1412 gDvmJit.codeCacheFull = true;
1413 cUnit->baseAddr = NULL;
1414 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1415 return;
1416 }
1417
1418 /* write chaining cell count offset & chaining cell offset */
1419 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1420 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1421 pOffset[1] = streamChainingStart - streamMethodStart;
1422
1423 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1424
1425 gDvmJit.codeCacheByteUsed += (stream - streamStart);
1426 if (cUnit->printMe) {
1427 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1428 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1429 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1430 cUnit->method->clazz->descriptor, cUnit->method->name,
1431 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1432 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1433 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1434 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1435 }
1436 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1437 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1438 cUnit->totalSize, gDvmJit.codeCache);
1439
1440 gDvmJit.numCompilations++;
1441
1442 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1443 }
1444
1445 /*
1446 * Perform translation chain operation.
1447 */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1448 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1449 {
1450 #ifdef JIT_CHAIN
1451 int relOffset = (int) tgtAddr - (int)branchAddr;
1452
1453 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1454 (gDvmJit.codeCacheFull == false)) {
1455
1456 gDvmJit.translationChains++;
1457
1458 //OpndSize immSize = estOpndSizeFromImm(relOffset);
1459 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1460 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1461 * the original code sequence.
1462 */
1463 OpndSize immSize = OpndSize_32;
1464 relOffset -= 5;
1465 //can't use stream here since it is used by the compilation thread
1466 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1467 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1468 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1469
1470 gDvmJit.hasNewChain = true;
1471
1472 COMPILER_TRACE_CHAINING(
1473 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1474 (int) branchAddr, tgtAddr, relOffset));
1475 }
1476 #endif
1477 return tgtAddr;
1478 }
1479
1480 /*
1481 * Accept the work and start compiling. Returns true if compilation
1482 * is attempted.
1483 */
dvmCompilerDoWork(CompilerWorkOrder * work)1484 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1485 {
1486 JitTraceDescription *desc;
1487 bool isCompile;
1488 bool success = true;
1489
1490 if (gDvmJit.codeCacheFull) {
1491 return false;
1492 }
1493
1494 switch (work->kind) {
1495 case kWorkOrderTrace:
1496 isCompile = true;
1497 /* Start compilation with maximally allowed trace length */
1498 desc = (JitTraceDescription *)work->info;
1499 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1500 work->bailPtr, 0 /* no hints */);
1501 break;
1502 case kWorkOrderTraceDebug: {
1503 bool oldPrintMe = gDvmJit.printMe;
1504 gDvmJit.printMe = true;
1505 isCompile = true;
1506 /* Start compilation with maximally allowed trace length */
1507 desc = (JitTraceDescription *)work->info;
1508 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1509 work->bailPtr, 0 /* no hints */);
1510 gDvmJit.printMe = oldPrintMe;
1511 break;
1512 }
1513 case kWorkOrderProfileMode:
1514 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1515 isCompile = false;
1516 break;
1517 default:
1518 isCompile = false;
1519 ALOGE("Jit: unknown work order type");
1520 assert(0); // Bail if debug build, discard otherwise
1521 }
1522 if (!success)
1523 work->result.codeAddress = NULL;
1524 return isCompile;
1525 }
1526
dvmCompilerCacheFlush(long start,long end,long flags)1527 void dvmCompilerCacheFlush(long start, long end, long flags) {
1528 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1529 }
1530
1531 //#endif
1532