1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT 0xe7fe
30
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34
35 /*
36 * Determine the initial instruction set to be used for this trace.
37 * Later components may decide to change this.
38 */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42 return DALVIK_JIT_IA32;
43 }
44
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47 return DALVIK_JIT_IA32;
48 }
49
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53 return NULL;
54 }
55
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES() (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64 /* Target-specific configuration */
65 gDvmJit.jitTableSize = 1 << 12;
66 gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67 if (gDvmJit.threshold == 0) {
68 gDvmJit.threshold = 255;
69 }
70 if (gDvmJit.codeCacheSize == DEFAULT_CODE_CACHE_SIZE) {
71 gDvmJit.codeCacheSize = 512 * 1024;
72 } else if ((gDvmJit.codeCacheSize == 0) && (gDvm.executionMode == kExecutionModeJit)) {
73 gDvm.executionMode = kExecutionModeInterpFast;
74 }
75 gDvmJit.optLevel = kJitOptLevelO1;
76
77 //Disable Method-JIT
78 gDvmJit.disableOpt |= (1 << kMethodJit);
79
80 #if defined(WITH_SELF_VERIFICATION)
81 /* Force into blocking mode */
82 gDvmJit.blockingMode = true;
83 gDvm.nativeDebuggerActive = true;
84 #endif
85
86 // Make sure all threads have current values
87 dvmJitUpdateThreadStateAll();
88
89 return true;
90 }
91
dvmCompilerPatchInlineCache(void)92 void dvmCompilerPatchInlineCache(void)
93 {
94 int i;
95 PredictedChainingCell *minAddr, *maxAddr;
96
97 /* Nothing to be done */
98 if (gDvmJit.compilerICPatchIndex == 0) return;
99
100 /*
101 * Since all threads are already stopped we don't really need to acquire
102 * the lock. But race condition can be easily introduced in the future w/o
103 * paying attention so we still acquire the lock here.
104 */
105 dvmLockMutex(&gDvmJit.compilerICPatchLock);
106
107 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
108
109 //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
110
111 /* Initialize the min/max address range */
112 minAddr = (PredictedChainingCell *)
113 ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
114 maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
115
116 for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
117 ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
118 PredictedChainingCell *cellAddr = workOrder->cellAddr;
119 PredictedChainingCell *cellContent = &workOrder->cellContent;
120 ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
121 workOrder->classLoader);
122
123 assert(clazz->serialNumber == workOrder->serialNumber);
124
125 /* Use the newly resolved clazz pointer */
126 cellContent->clazz = clazz;
127
128 if (cellAddr->clazz == NULL) {
129 COMPILER_TRACE_CHAINING(
130 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
131 cellAddr,
132 cellContent->clazz->descriptor,
133 cellContent->method->name));
134 } else {
135 COMPILER_TRACE_CHAINING(
136 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
137 "patched",
138 cellAddr,
139 cellAddr->clazz->descriptor,
140 cellContent->clazz->descriptor,
141 cellContent->method->name));
142 }
143
144 /* Patch the chaining cell */
145 *cellAddr = *cellContent;
146 minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
147 maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
148 }
149
150 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
151
152 gDvmJit.compilerICPatchIndex = 0;
153 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
154 }
155
156 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)157 void dvmCompilerCacheClear(char *start, size_t size)
158 {
159 /* "0xFF 0xFF" is an invalid opcode for x86. */
160 memset(start, 0xFF, size);
161 }
162
163 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)164 void dvmJitCalleeSave(double *saveArea) {
165 }
166
dvmJitCalleeRestore(double * saveArea)167 void dvmJitCalleeRestore(double *saveArea) {
168 }
169
dvmJitToInterpSingleStep()170 void dvmJitToInterpSingleStep() {
171 }
172
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)173 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
174 const JitEntry *knownEntry) {
175 return NULL;
176 }
177
dvmCompilerCodegenDump(CompilationUnit * cUnit)178 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
179 {
180 }
181
dvmCompilerArchDump(void)182 void dvmCompilerArchDump(void)
183 {
184 }
185
getTraceBase(const JitEntry * p)186 char *getTraceBase(const JitEntry *p)
187 {
188 return NULL;
189 }
190
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)191 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
192 {
193 }
194
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)195 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
196 {
197 }
198
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)199 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
200 {
201 // Method-based JIT not supported for x86.
202 }
203
dvmJitScanAllClassPointers(void (* callback)(void *))204 void dvmJitScanAllClassPointers(void (*callback)(void *))
205 {
206 }
207
208 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)209 static inline int getProfileCount(const JitEntry *entry)
210 {
211 if (entry->dPC == 0 || entry->codeAddress == 0)
212 return 0;
213 u4 *pExecutionCount = (u4 *) getTraceBase(entry);
214
215 return pExecutionCount ? *pExecutionCount : 0;
216 }
217
218 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)219 static int sortTraceProfileCount(const void *entry1, const void *entry2)
220 {
221 const JitEntry *jitEntry1 = (const JitEntry *)entry1;
222 const JitEntry *jitEntry2 = (const JitEntry *)entry2;
223
224 JitTraceCounter_t count1 = getProfileCount(jitEntry1);
225 JitTraceCounter_t count2 = getProfileCount(jitEntry2);
226 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
227 }
228
229 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()230 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
231 {
232 JitEntry *sortedEntries;
233 int numTraces = 0;
234 unsigned long counts = 0;
235 unsigned int i;
236
237 /* Make sure that the table is not changing */
238 dvmLockMutex(&gDvmJit.tableLock);
239
240 /* Sort the entries by descending order */
241 sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
242 if (sortedEntries == NULL)
243 goto done;
244 memcpy(sortedEntries, gDvmJit.pJitEntryTable,
245 sizeof(JitEntry) * gDvmJit.jitTableSize);
246 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
247 sortTraceProfileCount);
248
249 /* Dump the sorted entries */
250 for (i=0; i < gDvmJit.jitTableSize; i++) {
251 if (sortedEntries[i].dPC != 0) {
252 numTraces++;
253 }
254 }
255 if (numTraces == 0)
256 numTraces = 1;
257 ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
258
259 free(sortedEntries);
260 done:
261 dvmUnlockMutex(&gDvmJit.tableLock);
262 return;
263 }
264
jumpWithRelOffset(char * instAddr,int relOffset)265 void jumpWithRelOffset(char* instAddr, int relOffset) {
266 stream = instAddr;
267 OpndSize immSize = estOpndSizeFromImm(relOffset);
268 relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
269 dump_imm(Mnemonic_JMP, immSize, relOffset);
270 }
271
272 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)273 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
274 stream = instAddr;
275 bool unknown;
276 OpndSize size;
277 int relativeNCG = targetId;
278 relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
279 unconditional_jump_int(relativeNCG, size);
280 return NULL;
281 }
282
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)283 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
284 stream = instAddr;
285 bool unknown;
286 OpndSize size;
287 int relativeNCG = targetId;
288 relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
289 conditional_jump_int(cc, relativeNCG, size);
290 return NULL;
291 }
292
293 /*
294 * Attempt to enqueue a work order to patch an inline cache for a predicted
295 * chaining cell for virtual/interface calls.
296 */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)297 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
298 PredictedChainingCell *newContent)
299 {
300 bool result = true;
301
302 /*
303 * Make sure only one thread gets here since updating the cell (ie fast
304 * path and queueing the request (ie the queued path) have to be done
305 * in an atomic fashion.
306 */
307 dvmLockMutex(&gDvmJit.compilerICPatchLock);
308
309 /* Fast path for uninitialized chaining cell */
310 if (cellAddr->clazz == NULL &&
311 cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
312 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
313
314 cellAddr->method = newContent->method;
315 cellAddr->branch = newContent->branch;
316 cellAddr->branch2 = newContent->branch2;
317
318 /*
319 * The update order matters - make sure clazz is updated last since it
320 * will bring the uninitialized chaining cell to life.
321 */
322 android_atomic_release_store((int32_t)newContent->clazz,
323 (volatile int32_t *)(void*) &cellAddr->clazz);
324 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
325 UPDATE_CODE_CACHE_PATCHES();
326
327 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
328
329 #if 0
330 MEM_BARRIER();
331 cellAddr->clazz = newContent->clazz;
332 //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
333 #endif
334 #if defined(WITH_JIT_TUNING)
335 gDvmJit.icPatchInit++;
336 #endif
337 COMPILER_TRACE_CHAINING(
338 ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
339 cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
340 /* Check if this is a frequently missed clazz */
341 } else if (cellAddr->stagedClazz != newContent->clazz) {
342 /* Not proven to be frequent yet - build up the filter cache */
343 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
344
345 cellAddr->stagedClazz = newContent->clazz;
346
347 UPDATE_CODE_CACHE_PATCHES();
348 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
349
350 #if defined(WITH_JIT_TUNING)
351 gDvmJit.icPatchRejected++;
352 #endif
353 /*
354 * Different classes but same method implementation - it is safe to just
355 * patch the class value without the need to stop the world.
356 */
357 } else if (cellAddr->method == newContent->method) {
358 UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
359
360 cellAddr->clazz = newContent->clazz;
361 /* No need to flush the cache here since the branch is not patched */
362 UPDATE_CODE_CACHE_PATCHES();
363
364 PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
365
366 #if defined(WITH_JIT_TUNING)
367 gDvmJit.icPatchLockFree++;
368 #endif
369 /*
370 * Cannot patch the chaining cell inline - queue it until the next safe
371 * point.
372 */
373 } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE) {
374 int index = gDvmJit.compilerICPatchIndex++;
375 const ClassObject *clazz = newContent->clazz;
376
377 gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
378 gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
379 gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
380 gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
381 /* For verification purpose only */
382 gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
383
384 #if defined(WITH_JIT_TUNING)
385 gDvmJit.icPatchQueued++;
386 #endif
387 COMPILER_TRACE_CHAINING(
388 ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
389 cellAddr, newContent->clazz->descriptor, newContent->method->name));
390 } else {
391 /* Queue is full - just drop this patch request */
392 #if defined(WITH_JIT_TUNING)
393 gDvmJit.icPatchDropped++;
394 #endif
395
396 COMPILER_TRACE_CHAINING(
397 ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
398 cellAddr, newContent->clazz->descriptor, newContent->method->name));
399 }
400
401 dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
402 return result;
403 }
404
405 /*
406 * This method is called from the invoke templates for virtual and interface
407 * methods to speculatively setup a chain to the callee. The templates are
408 * written in assembly and have setup method, cell, and clazz at r0, r2, and
409 * r3 respectively, so there is a unused argument in the list. Upon return one
410 * of the following three results may happen:
411 * 1) Chain is not setup because the callee is native. Reset the rechain
412 * count to a big number so that it will take a long time before the next
413 * rechain attempt to happen.
414 * 2) Chain is not setup because the callee has not been created yet. Reset
415 * the rechain count to a small number and retry in the near future.
416 * 3) Ask all other threads to stop before patching this chaining cell.
417 * This is required because another thread may have passed the class check
418 * but hasn't reached the chaining cell yet to follow the chain. If we
419 * patch the content before halting the other thread, there could be a
420 * small window for race conditions to happen that it may follow the new
421 * but wrong chain to invoke a different method.
422 */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)423 const Method *dvmJitToPatchPredictedChain(const Method *method,
424 Thread *self,
425 PredictedChainingCell *cell,
426 const ClassObject *clazz)
427 {
428 int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
429 /* Don't come back here for a long time if the method is native */
430 if (dvmIsNativeMethod(method)) {
431 UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
432
433 /*
434 * Put a non-zero/bogus value in the clazz field so that it won't
435 * trigger immediate patching and will continue to fail to match with
436 * a real clazz pointer.
437 */
438 cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
439
440 UPDATE_CODE_CACHE_PATCHES();
441 PROTECT_CODE_CACHE(cell, sizeof(*cell));
442 COMPILER_TRACE_CHAINING(
443 ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
444 cell, method->name));
445 goto done;
446 }
447 {
448 int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
449
450 /*
451 * Compilation not made yet for the callee. Reset the counter to a small
452 * value and come back to check soon.
453 */
454 if ((tgtAddr == 0) ||
455 ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
456 COMPILER_TRACE_CHAINING(
457 ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
458 cell, method->clazz->descriptor, method->name));
459 goto done;
460 }
461
462 PredictedChainingCell newCell;
463
464 if (cell->clazz == NULL) {
465 newRechainCount = self->icRechainCount;
466 }
467
468 int relOffset = (int) tgtAddr - (int)cell;
469 OpndSize immSize = estOpndSizeFromImm(relOffset);
470 int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
471 relOffset -= jumpSize;
472 COMPILER_TRACE_CHAINING(
473 ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
474 cell, method->clazz->descriptor, method->name, jumpSize));
475 //can't use stream here since it is used by the compilation thread
476 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
477
478 newCell.clazz = clazz;
479 newCell.method = method;
480
481 /*
482 * Enter the work order to the queue and the chaining cell will be patched
483 * the next time a safe point is entered.
484 *
485 * If the enqueuing fails reset the rechain count to a normal value so that
486 * it won't get indefinitely delayed.
487 */
488 inlineCachePatchEnqueue(cell, &newCell);
489 }
490 done:
491 self->icRechainCount = newRechainCount;
492 return method;
493 }
494
495 /*
496 * Unchain a trace given the starting address of the translation
497 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
498 * For ARM, it returns the address following the last cell unchained.
499 * For IA, it returns NULL since cacheflush is not required for IA.
500 */
dvmJitUnchain(void * codeAddr)501 u4* dvmJitUnchain(void* codeAddr)
502 {
503 /* codeAddr is 4-byte aligned, so is chain cell count offset */
504 u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
505 u2 chainCellCountOffset = *pChainCellCountOffset;
506 /* chain cell counts information is 4-byte aligned */
507 ChainCellCounts *pChainCellCounts =
508 (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
509 u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
510 u2 chainCellOffset = *pChainCellOffset;
511 u1* pChainCells;
512 int i,j;
513 PredictedChainingCell *predChainCell;
514 int padding;
515
516 /* Locate the beginning of the chain cell region */
517 pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
518
519 /* The cells are sorted in order - walk through them and reset */
520 for (i = 0; i < kChainingCellGap; i++) {
521 /* for hot, normal, singleton chaining:
522 nop //padding.
523 jmp 0
524 mov imm32, reg1
525 mov imm32, reg2
526 call reg2
527 after chaining:
528 nop
529 jmp imm
530 mov imm32, reg1
531 mov imm32, reg2
532 call reg2
533 after unchaining:
534 nop
535 jmp 0
536 mov imm32, reg1
537 mov imm32, reg2
538 call reg2
539 Space occupied by the chaining cell in bytes: nop is for padding,
540 jump 0, the target 0 is 4 bytes aligned.
541 Space for predicted chaining: 5 words = 20 bytes
542 */
543 int elemSize = 0;
544 if (i == kChainingCellInvokePredicted) {
545 elemSize = 20;
546 }
547 COMPILER_TRACE_CHAINING(
548 ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
549
550 for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
551 switch(i) {
552 case kChainingCellNormal:
553 case kChainingCellHot:
554 case kChainingCellInvokeSingleton:
555 case kChainingCellBackwardBranch:
556 COMPILER_TRACE_CHAINING(
557 ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
558 pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
559 elemSize = 4+5+5+2;
560 memset(pChainCells, 0, 4);
561 break;
562 case kChainingCellInvokePredicted:
563 COMPILER_TRACE_CHAINING(
564 ALOGI("Jit Runtime: unchaining of predicted"));
565 /* 4-byte aligned */
566 padding = (4 - ((u4)pChainCells & 3)) & 3;
567 pChainCells += padding;
568 predChainCell = (PredictedChainingCell *) pChainCells;
569 /*
570 * There could be a race on another mutator thread to use
571 * this particular predicted cell and the check has passed
572 * the clazz comparison. So we cannot safely wipe the
573 * method and branch but it is safe to clear the clazz,
574 * which serves as the key.
575 */
576 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
577 break;
578 default:
579 ALOGE("Unexpected chaining type: %d", i);
580 dvmAbort(); // dvmAbort OK here - can't safely recover
581 }
582 COMPILER_TRACE_CHAINING(
583 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
584 pChainCells += elemSize; /* Advance by a fixed number of bytes */
585 }
586 }
587 return NULL;
588 }
589
590 /* Unchain all translation in the cache. */
dvmJitUnchainAll()591 void dvmJitUnchainAll()
592 {
593 ALOGV("Jit Runtime: unchaining all");
594 if (gDvmJit.pJitEntryTable != NULL) {
595 COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
596 dvmLockMutex(&gDvmJit.tableLock);
597
598 UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
599
600 for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
601 if (gDvmJit.pJitEntryTable[i].dPC &&
602 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
603 gDvmJit.pJitEntryTable[i].codeAddress) {
604 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
605 }
606 }
607
608 PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
609
610 dvmUnlockMutex(&gDvmJit.tableLock);
611 gDvmJit.translationChains = 0;
612 }
613 gDvmJit.hasNewChain = false;
614 }
615
616 #define P_GPR_1 PhysicalReg_EBX
617 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()618 static void insertJumpHelp()
619 {
620 int rem = (uint)stream % 4;
621 int nop_size = 3 - rem;
622 dump_nop(nop_size);
623 unconditional_jump_int(0, OpndSize_32);
624 return;
625 }
626
627 /* Chaining cell for code that may need warmup. */
628 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
629 blx r0
630 data 0xb23a //bytecode address: 0x5115b23a
631 data 0x5115
632 IA32 assembly:
633 jmp 0 //5 bytes
634 movl address, %ebx
635 movl dvmJitToInterpNormal, %eax
636 call %eax
637 <-- return address
638 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)639 static void handleNormalChainingCell(CompilationUnit *cUnit,
640 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
641 {
642 ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
643 cUnit->method->name, blockId, offset, stream - streamMethodStart);
644 if(dump_x86_inst)
645 ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
646 offset, stream - streamMethodStart, stream);
647 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
648 * reslove the multithreading issue.
649 */
650 insertJumpHelp();
651 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
652 scratchRegs[0] = PhysicalReg_EAX;
653 call_dvmJitToInterpNormal();
654 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
655 }
656
657 /*
658 * Chaining cell for instructions that immediately following already translated
659 * code.
660 */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)661 static void handleHotChainingCell(CompilationUnit *cUnit,
662 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
663 {
664 ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
665 cUnit->method->name, blockId, offset, stream - streamMethodStart);
666 if(dump_x86_inst)
667 ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
668 offset, stream - streamMethodStart, stream);
669 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
670 * reslove the multithreading issue.
671 */
672 insertJumpHelp();
673 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
674 scratchRegs[0] = PhysicalReg_EAX;
675 call_dvmJitToInterpTraceSelect();
676 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
677 }
678
679 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)680 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
681 unsigned int offset, int blockId, LowOpBlockLabel* labelList)
682 {
683 ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
684 cUnit->method->name, blockId, offset, stream - streamMethodStart);
685 if(dump_x86_inst)
686 ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
687 offset, stream - streamMethodStart, stream);
688 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
689 * reslove the multithreading issue.
690 */
691 insertJumpHelp();
692 move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
693 scratchRegs[0] = PhysicalReg_EAX;
694 call_dvmJitToInterpNormal();
695 //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
696 }
697
698 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)699 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
700 const Method *callee, int blockId, LowOpBlockLabel* labelList)
701 {
702 ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
703 cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
704 if(dump_x86_inst)
705 ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
706 blockId, stream - streamMethodStart, stream);
707 /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
708 * reslove the multithreading issue.
709 */
710 insertJumpHelp();
711 move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
712 scratchRegs[0] = PhysicalReg_EAX;
713 call_dvmJitToInterpTraceSelect();
714 //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
715 }
716 #undef P_GPR_1
717
718 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)719 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
720 {
721 if(dump_x86_inst)
722 ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
723 blockId, stream - streamMethodStart, stream);
724 #ifndef PREDICTED_CHAINING
725 //assume rPC for callee->insns in %ebx
726 scratchRegs[0] = PhysicalReg_EAX;
727 #if defined(WITH_JIT_TUNING)
728 /* Predicted chaining is not enabled. Fall back to interpreter and
729 * indicate that predicted chaining was not done.
730 */
731 move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
732 #endif
733 call_dvmJitToInterpTraceSelectNoChain();
734 #else
735 /* make sure section for predicited chaining cell is 4-byte aligned */
736 //int padding = (4 - ((u4)stream & 3)) & 3;
737 //stream += padding;
738 int* streamData = (int*)stream;
739 /* Should not be executed in the initial state */
740 streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
741 streamData[1] = 0;
742 /* To be filled: class */
743 streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
744 /* To be filled: method */
745 streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
746 /*
747 * Rechain count. The initial value of 0 here will trigger chaining upon
748 * the first invocation of this callsite.
749 */
750 streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
751 #if 0
752 ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
753 *((int*)(stream+8)), *((int*)(stream+12)));
754 #endif
755 stream += 20; //5 *4
756 #endif
757 }
758
759 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)760 static void handlePCReconstruction(CompilationUnit *cUnit,
761 LowOpBlockLabel *targetLabel)
762 {
763 #if 0
764 LowOp **pcrLabel =
765 (LowOp **) cUnit->pcReconstructionList.elemList;
766 int numElems = cUnit->pcReconstructionList.numUsed;
767 int i;
768 for (i = 0; i < numElems; i++) {
769 dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
770 /* r0 = dalvik PC */
771 loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
772 genUnconditionalBranch(cUnit, targetLabel);
773 }
774 #endif
775 }
776
777 //use O0 code generator for hoisted checks outside of the loop
778 /*
779 * vA = arrayReg;
780 * vB = idxReg;
781 * vC = endConditionReg;
782 * arg[0] = maxC
783 * arg[1] = minC
784 * arg[2] = loopBranchConditionCode
785 */
786 #define P_GPR_1 PhysicalReg_EBX
787 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)788 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
789 {
790 /*
791 * NOTE: these synthesized blocks don't have ssa names assigned
792 * for Dalvik registers. However, because they dominate the following
793 * blocks we can simply use the Dalvik name w/ subscript 0 as the
794 * ssa name.
795 */
796 DecodedInstruction *dInsn = &mir->dalvikInsn;
797 const int maxC = dInsn->arg[0];
798
799 /* assign array in virtual register to P_GPR_1 */
800 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
801 /* assign index in virtual register to P_GPR_2 */
802 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
803 export_pc();
804 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
805 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
806 int delta = maxC;
807 /*
808 * If the loop end condition is ">=" instead of ">", then the largest value
809 * of the index is "endCondition - 1".
810 */
811 if (dInsn->arg[2] == OP_IF_GE) {
812 delta--;
813 }
814
815 if (delta < 0) { //+delta
816 //if P_GPR_2 is mapped to a VR, we can't do this
817 alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
818 } else if(delta > 0) {
819 alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
820 }
821 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
822 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
823 }
824
825 /*
826 * vA = arrayReg;
827 * vB = idxReg;
828 * vC = endConditionReg;
829 * arg[0] = maxC
830 * arg[1] = minC
831 * arg[2] = loopBranchConditionCode
832 */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)833 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
834 {
835 DecodedInstruction *dInsn = &mir->dalvikInsn;
836 const int maxC = dInsn->arg[0];
837
838 /* assign array in virtual register to P_GPR_1 */
839 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
840 /* assign index in virtual register to P_GPR_2 */
841 get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
842 export_pc();
843 compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
844 condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
845
846 if (maxC < 0) {
847 //if P_GPR_2 is mapped to a VR, we can't do this
848 alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
849 } else if(maxC > 0) {
850 alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
851 }
852 compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
853 condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
854
855 }
856 #undef P_GPR_1
857 #undef P_GPR_2
858
859 /*
860 * vA = idxReg;
861 * vB = minC;
862 */
863 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)864 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
865 {
866 DecodedInstruction *dInsn = &mir->dalvikInsn;
867 const int minC = dInsn->vB;
868 get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
869 export_pc();
870 compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
871 condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
872 }
873 #undef P_GPR_1
874
875 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)876 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
877 {
878 CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
879 if(gDvm.executionMode == kExecutionModeNcgO0) {
880 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
881 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
882 compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
883 export_pc(); //use %edx
884 conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
885 move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
886 compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
887 } else {
888 get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
889 move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
890 nullCheck(5, false, 1, mir->dalvikInsn.vC);
891 move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
892 compare_reg_reg(4, false, 6, false);
893 }
894
895 //immdiate will be updated later in genLandingPadForMispredictedCallee
896 streamMisPred = stream;
897 callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
898 }
899 #endif
900
901 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)902 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
903 {
904 ExecutionMode origMode = gDvm.executionMode;
905 gDvm.executionMode = kExecutionModeNcgO0;
906 switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
907 case kMirOpPhi: {
908 break;
909 }
910 case kMirOpNullNRangeUpCheck: {
911 genHoistedChecksForCountUpLoop(cUnit, mir);
912 break;
913 }
914 case kMirOpNullNRangeDownCheck: {
915 genHoistedChecksForCountDownLoop(cUnit, mir);
916 break;
917 }
918 case kMirOpLowerBound: {
919 genHoistedLowerBoundCheck(cUnit, mir);
920 break;
921 }
922 case kMirOpPunt: {
923 break;
924 }
925 #ifdef WITH_JIT_INLINING
926 case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
927 genValidationForPredictedInline(cUnit, mir);
928 break;
929 }
930 #endif
931 default:
932 break;
933 }
934 gDvm.executionMode = origMode;
935 }
936
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)937 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
938 int bodyId)
939 {
940 /*
941 * Next, create two branches - one branch over to the loop body and the
942 * other branch to the PCR cell to punt.
943 */
944 //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
945 //setupResourceMasks(branchToBody);
946 //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
947
948 #if 0
949 LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
950 branchToPCR->opCode = kThumbBUncond;
951 branchToPCR->generic.target = (LIR *) pcrLabel;
952 setupResourceMasks(branchToPCR);
953 cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
954 #endif
955 }
956
957 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)958 bool mergeBlock(BasicBlock *bb) {
959 if(bb->blockType == kDalvikByteCode &&
960 bb->firstMIRInsn != NULL &&
961 (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
962 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
963 bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
964 bb->fallThrough == NULL) {// &&
965 //cUnit->hasLoop) {
966 //ALOGI("merge blocks ending with goto at index %d", i);
967 MIR* prevInsn = bb->lastMIRInsn->prev;
968 if(bb->taken == NULL) return false;
969 MIR* mergeInsn = bb->taken->firstMIRInsn;
970 if(mergeInsn == NULL) return false;
971 if(prevInsn == NULL) {//the block has a single instruction
972 bb->firstMIRInsn = mergeInsn;
973 } else {
974 prevInsn->next = mergeInsn; //remove goto from the chain
975 }
976 mergeInsn->prev = prevInsn;
977 bb->lastMIRInsn = bb->taken->lastMIRInsn;
978 bb->taken->firstMIRInsn = NULL; //block being merged in
979 bb->fallThrough = bb->taken->fallThrough;
980 bb->taken = bb->taken->taken;
981 return true;
982 }
983 return false;
984 }
985
genTraceProfileEntry(CompilationUnit * cUnit)986 static int genTraceProfileEntry(CompilationUnit *cUnit)
987 {
988 cUnit->headerSize = 6;
989 if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
990 (gDvmJit.profileMode == kTraceProfilingDisabled)) {
991 return 12;
992 } else {
993 return 4;
994 }
995
996 }
997
998 #define PRINT_BUFFER_LEN 1024
999 /* Print the code block in code cache in the range of [startAddr, endAddr)
1000 * in readable format.
1001 */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)1002 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
1003 {
1004 char strbuf[PRINT_BUFFER_LEN];
1005 unsigned char *addr;
1006 unsigned char *next_addr;
1007 int n;
1008
1009 if (gDvmJit.printBinary) {
1010 // print binary in bytes
1011 n = 0;
1012 for (addr = startAddr; addr < endAddr; addr++) {
1013 n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
1014 if (n > PRINT_BUFFER_LEN - 10) {
1015 ALOGD("## %s", strbuf);
1016 n = 0;
1017 }
1018 }
1019 if (n > 0)
1020 ALOGD("## %s", strbuf);
1021 }
1022
1023 // print disassembled instructions
1024 addr = startAddr;
1025 while (addr < endAddr) {
1026 next_addr = reinterpret_cast<unsigned char*>
1027 (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1028 strbuf, PRINT_BUFFER_LEN));
1029 if (addr != next_addr) {
1030 ALOGD("** %p: %s", addr, strbuf);
1031 } else { // check whether this is nop padding
1032 if (addr[0] == 0x90) {
1033 ALOGD("** %p: NOP (1 byte)", addr);
1034 next_addr += 1;
1035 } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1036 ALOGD("** %p: NOP (2 bytes)", addr);
1037 next_addr += 2;
1038 } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1039 ALOGD("** %p: NOP (3 bytes)", addr);
1040 next_addr += 3;
1041 } else {
1042 ALOGD("** unable to decode binary at %p", addr);
1043 break;
1044 }
1045 }
1046 addr = next_addr;
1047 }
1048 }
1049
1050 /* 4 is the number of additional bytes needed for chaining information for trace:
1051 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1052 #define EXTRA_BYTES_FOR_CHAINING 4
1053
1054 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1055 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1056 {
1057 dump_x86_inst = cUnit->printMe;
1058 /* Used to hold the labels of each block */
1059 LowOpBlockLabel *labelList =
1060 (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1061 LowOp *headLIR = NULL;
1062 GrowableList chainingListByType[kChainingCellLast];
1063 unsigned int i, padding;
1064
1065 /*
1066 * Initialize various types chaining lists.
1067 */
1068 for (i = 0; i < kChainingCellLast; i++) {
1069 dvmInitGrowableList(&chainingListByType[i], 2);
1070 }
1071
1072 /* Clear the visited flag for each block */
1073 dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1074 kAllNodes, false /* isIterative */);
1075
1076 GrowableListIterator iterator;
1077 dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1078
1079 /* Traces start with a profiling entry point. Generate it here */
1080 cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1081
1082 //BasicBlock **blockList = cUnit->blockList;
1083 GrowableList *blockList = &cUnit->blockList;
1084 BasicBlock *bb;
1085
1086 info->codeAddress = NULL;
1087 stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1088 streamStart = stream; /* trace start before alignment */
1089
1090 // TODO: compile into a temporary buffer and then copy into the code cache.
1091 // That would let us leave the code cache unprotected for a shorter time.
1092 size_t unprotected_code_cache_bytes =
1093 gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed;
1094 UNPROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1095
1096 stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1097 stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1098 streamMethodStart = stream; /* code start */
1099 for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1100 labelList[i].lop.generic.offset = -1;
1101 }
1102 cUnit->exceptionBlockId = -1;
1103 for (i = 0; i < blockList->numUsed; i++) {
1104 bb = (BasicBlock *) blockList->elemList[i];
1105 if(bb->blockType == kExceptionHandling)
1106 cUnit->exceptionBlockId = i;
1107 }
1108 startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1109 if(gDvm.executionMode == kExecutionModeNcgO1) {
1110 //merge blocks ending with "goto" with the fall through block
1111 if (cUnit->jitMode != kJitLoop)
1112 for (i = 0; i < blockList->numUsed; i++) {
1113 bb = (BasicBlock *) blockList->elemList[i];
1114 bool merged = mergeBlock(bb);
1115 while(merged) merged = mergeBlock(bb);
1116 }
1117 for (i = 0; i < blockList->numUsed; i++) {
1118 bb = (BasicBlock *) blockList->elemList[i];
1119 if(bb->blockType == kDalvikByteCode &&
1120 bb->firstMIRInsn != NULL) {
1121 preprocessingBB(bb);
1122 }
1123 }
1124 preprocessingTrace();
1125 }
1126
1127 /* Handle the content in each basic block */
1128 for (i = 0; ; i++) {
1129 MIR *mir;
1130 bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1131 if (bb == NULL) break;
1132 if (bb->visited == true) continue;
1133
1134 labelList[i].immOpnd.value = bb->startOffset;
1135
1136 if (bb->blockType >= kChainingCellLast) {
1137 /*
1138 * Append the label pseudo LIR first. Chaining cells will be handled
1139 * separately afterwards.
1140 */
1141 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1142 }
1143
1144 if (bb->blockType == kEntryBlock) {
1145 labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1146 if (bb->firstMIRInsn == NULL) {
1147 continue;
1148 } else {
1149 setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1150 //&labelList[blockList[i]->fallThrough->id]);
1151 }
1152 } else if (bb->blockType == kExitBlock) {
1153 labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1154 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1155 goto gen_fallthrough;
1156 } else if (bb->blockType == kDalvikByteCode) {
1157 if (bb->hidden == true) continue;
1158 labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1159 /* Reset the register state */
1160 #if 0
1161 resetRegisterScoreboard(cUnit);
1162 #endif
1163 } else {
1164 switch (bb->blockType) {
1165 case kChainingCellNormal:
1166 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1167 /* handle the codegen later */
1168 dvmInsertGrowableList(
1169 &chainingListByType[kChainingCellNormal], i);
1170 break;
1171 case kChainingCellInvokeSingleton:
1172 labelList[i].lop.opCode2 =
1173 ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1174 labelList[i].immOpnd.value =
1175 (int) bb->containingMethod;
1176 /* handle the codegen later */
1177 dvmInsertGrowableList(
1178 &chainingListByType[kChainingCellInvokeSingleton], i);
1179 break;
1180 case kChainingCellInvokePredicted:
1181 labelList[i].lop.opCode2 =
1182 ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1183 /*
1184 * Move the cached method pointer from operand 1 to 0.
1185 * Operand 0 was clobbered earlier in this routine to store
1186 * the block starting offset, which is not applicable to
1187 * predicted chaining cell.
1188 */
1189 //TODO
1190 //labelList[i].operands[0] = labelList[i].operands[1];
1191
1192 /* handle the codegen later */
1193 dvmInsertGrowableList(
1194 &chainingListByType[kChainingCellInvokePredicted], i);
1195 break;
1196 case kChainingCellHot:
1197 labelList[i].lop.opCode2 =
1198 ATOM_PSEUDO_CHAINING_CELL_HOT;
1199 /* handle the codegen later */
1200 dvmInsertGrowableList(
1201 &chainingListByType[kChainingCellHot], i);
1202 break;
1203 case kPCReconstruction:
1204 /* Make sure exception handling block is next */
1205 labelList[i].lop.opCode2 =
1206 ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1207 //assert (i == cUnit->numBlocks - 2);
1208 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1209 handlePCReconstruction(cUnit,
1210 &labelList[cUnit->puntBlock->id]);
1211 break;
1212 case kExceptionHandling:
1213 labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1214 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1215 //if (cUnit->pcReconstructionList.numUsed) {
1216 scratchRegs[0] = PhysicalReg_EAX;
1217 jumpToInterpPunt();
1218 //call_dvmJitToInterpPunt();
1219 //}
1220 break;
1221 case kChainingCellBackwardBranch:
1222 labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1223 /* handle the codegen later */
1224 dvmInsertGrowableList(
1225 &chainingListByType[kChainingCellBackwardBranch],
1226 i);
1227 break;
1228 default:
1229 break;
1230 }
1231 continue;
1232 }
1233 {
1234 //LowOp *headLIR = NULL;
1235 const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1236 const u2 *startCodePtr = dexCode->insns;
1237 const u2 *codePtr;
1238 labelList[i].lop.generic.offset = (stream - streamMethodStart);
1239 ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1240 bb->id, bb->blockType, bb->hidden);
1241 for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1242 bb = nextBB;
1243 bb->visited = true;
1244 cUnit->nextCodegenBlock = NULL;
1245
1246 if(gDvm.executionMode == kExecutionModeNcgO1 &&
1247 bb->blockType != kEntryBlock &&
1248 bb->firstMIRInsn != NULL) {
1249 startOfBasicBlock(bb);
1250 int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1251 endOfBasicBlock(bb);
1252 if(cg_ret < 0) {
1253 endOfTrace(true/*freeOnly*/);
1254 cUnit->baseAddr = NULL;
1255 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1256 return;
1257 }
1258 } else {
1259 for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1260 startOfBasicBlock(bb); //why here for O0
1261 Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1262 if((int)dalvikOpCode >= (int)kMirOpFirst) {
1263 handleExtendedMIR(cUnit, mir);
1264 continue;
1265 }
1266 InstructionFormat dalvikFormat =
1267 dexGetFormatFromOpcode(dalvikOpCode);
1268 ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1269 mir->offset, dalvikOpCode, dalvikFormat);
1270 LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1271 /* Remember the first LIR for this block */
1272 if (headLIR == NULL) {
1273 headLIR = (LowOp*)boundaryLIR;
1274 }
1275 bool notHandled = true;
1276 /*
1277 * Debugging: screen the opcode first to see if it is in the
1278 * do[-not]-compile list
1279 */
1280 bool singleStepMe =
1281 gDvmJit.includeSelectedOp !=
1282 ((gDvmJit.opList[dalvikOpCode >> 3] &
1283 (1 << (dalvikOpCode & 0x7))) !=
1284 0);
1285 if (singleStepMe || cUnit->allSingleStep) {
1286 } else {
1287 codePtr = startCodePtr + mir->offset;
1288 //lower each byte code, update LIR
1289 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1290 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1291 CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1292 ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1293 gDvmJit.codeCacheFull = true;
1294 cUnit->baseAddr = NULL;
1295 endOfTrace(true/*freeOnly*/);
1296 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1297 return;
1298 }
1299 }
1300 if (notHandled) {
1301 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1302 mir->offset,
1303 dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1304 dalvikFormat);
1305 dvmAbort();
1306 break;
1307 }
1308 } // end for
1309 } // end else //JIT + O0 code generator
1310 }
1311 } // end for
1312 /* Eliminate redundant loads/stores and delay stores into later slots */
1313 #if 0
1314 dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1315 cUnit->lastLIRInsn);
1316 #endif
1317 if (headLIR) headLIR = NULL;
1318 gen_fallthrough:
1319 /*
1320 * Check if the block is terminated due to trace length constraint -
1321 * insert an unconditional branch to the chaining cell.
1322 */
1323 if (bb->needFallThroughBranch) {
1324 jumpToBasicBlock(stream, bb->fallThrough->id);
1325 }
1326
1327 }
1328
1329 char* streamChainingStart = (char*)stream;
1330 /* Handle the chaining cells in predefined order */
1331 for (i = 0; i < kChainingCellGap; i++) {
1332 size_t j;
1333 int *blockIdList = (int *) chainingListByType[i].elemList;
1334
1335 cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1336
1337 /* No chaining cells of this type */
1338 if (cUnit->numChainingCells[i] == 0)
1339 continue;
1340
1341 /* Record the first LIR for a new type of chaining cell */
1342 cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1343 for (j = 0; j < chainingListByType[i].numUsed; j++) {
1344 int blockId = blockIdList[j];
1345 BasicBlock *chainingBlock =
1346 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1347 blockId);
1348
1349 labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1350
1351 /* Align this chaining cell first */
1352 #if 0
1353 newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1354 #endif
1355 /* Insert the pseudo chaining instruction */
1356 dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1357
1358
1359 switch (chainingBlock->blockType) {
1360 case kChainingCellNormal:
1361 handleNormalChainingCell(cUnit,
1362 chainingBlock->startOffset, blockId, labelList);
1363 break;
1364 case kChainingCellInvokeSingleton:
1365 handleInvokeSingletonChainingCell(cUnit,
1366 chainingBlock->containingMethod, blockId, labelList);
1367 break;
1368 case kChainingCellInvokePredicted:
1369 handleInvokePredictedChainingCell(cUnit, blockId);
1370 break;
1371 case kChainingCellHot:
1372 handleHotChainingCell(cUnit,
1373 chainingBlock->startOffset, blockId, labelList);
1374 break;
1375 case kChainingCellBackwardBranch:
1376 handleBackwardBranchChainingCell(cUnit,
1377 chainingBlock->startOffset, blockId, labelList);
1378 break;
1379 default:
1380 ALOGE("Bad blocktype %d", chainingBlock->blockType);
1381 dvmAbort();
1382 break;
1383 }
1384
1385 if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1386 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1387 gDvmJit.codeCacheFull = true;
1388 cUnit->baseAddr = NULL;
1389 endOfTrace(true); /* need to free structures */
1390 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1391 return;
1392 }
1393 }
1394 }
1395 #if 0
1396 dvmCompilerApplyGlobalOptimizations(cUnit);
1397 #endif
1398 endOfTrace(false);
1399
1400 if (gDvmJit.codeCacheFull) {
1401 /* We hit code cache size limit inside endofTrace(false).
1402 * Bail out for this trace!
1403 */
1404 ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1405 cUnit->baseAddr = NULL;
1406 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1407 return;
1408 }
1409
1410 /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1411 padding = (4 - ((u4)stream & 3)) & 3;
1412 stream += padding;
1413 ChainCellCounts chainCellCounts;
1414 /* Install the chaining cell counts */
1415 for (i=0; i< kChainingCellGap; i++) {
1416 chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1417 }
1418 char* streamCountStart = (char*)stream;
1419 memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1420 stream += sizeof(chainCellCounts);
1421
1422 cUnit->baseAddr = streamMethodStart;
1423 cUnit->totalSize = (stream - streamStart);
1424 if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1425 ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1426 gDvmJit.codeCacheFull = true;
1427 cUnit->baseAddr = NULL;
1428 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1429 return;
1430 }
1431
1432 /* write chaining cell count offset & chaining cell offset */
1433 u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1434 *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1435 pOffset[1] = streamChainingStart - streamMethodStart;
1436
1437 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1438
1439 gDvmJit.codeCacheByteUsed += (stream - streamStart);
1440 if (cUnit->printMe) {
1441 unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1442 unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1443 ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1444 cUnit->method->clazz->descriptor, cUnit->method->name,
1445 codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1446 ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1447 cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1448 printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1449 }
1450 ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1451 (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1452 cUnit->totalSize, gDvmJit.codeCache);
1453
1454 gDvmJit.numCompilations++;
1455
1456 info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1457 }
1458
1459 /*
1460 * Perform translation chain operation.
1461 */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1462 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1463 {
1464 #ifdef JIT_CHAIN
1465 int relOffset = (int) tgtAddr - (int)branchAddr;
1466
1467 if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1468 (gDvmJit.codeCacheFull == false)) {
1469
1470 gDvmJit.translationChains++;
1471
1472 //OpndSize immSize = estOpndSizeFromImm(relOffset);
1473 //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1474 /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1475 * the original code sequence.
1476 */
1477 OpndSize immSize = OpndSize_32;
1478 relOffset -= 5;
1479 //can't use stream here since it is used by the compilation thread
1480 UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1481 dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1482 PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1483
1484 gDvmJit.hasNewChain = true;
1485
1486 COMPILER_TRACE_CHAINING(
1487 ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1488 (int) branchAddr, tgtAddr, relOffset));
1489 }
1490 #endif
1491 return tgtAddr;
1492 }
1493
1494 /*
1495 * Accept the work and start compiling. Returns true if compilation
1496 * is attempted.
1497 */
dvmCompilerDoWork(CompilerWorkOrder * work)1498 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1499 {
1500 JitTraceDescription *desc;
1501 bool isCompile;
1502 bool success = true;
1503
1504 if (gDvmJit.codeCacheFull) {
1505 return false;
1506 }
1507
1508 switch (work->kind) {
1509 case kWorkOrderTrace:
1510 isCompile = true;
1511 /* Start compilation with maximally allowed trace length */
1512 desc = (JitTraceDescription *)work->info;
1513 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1514 work->bailPtr, 0 /* no hints */);
1515 break;
1516 case kWorkOrderTraceDebug: {
1517 bool oldPrintMe = gDvmJit.printMe;
1518 gDvmJit.printMe = true;
1519 isCompile = true;
1520 /* Start compilation with maximally allowed trace length */
1521 desc = (JitTraceDescription *)work->info;
1522 success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1523 work->bailPtr, 0 /* no hints */);
1524 gDvmJit.printMe = oldPrintMe;
1525 break;
1526 }
1527 case kWorkOrderProfileMode:
1528 dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1529 isCompile = false;
1530 break;
1531 default:
1532 isCompile = false;
1533 ALOGE("Jit: unknown work order type");
1534 assert(0); // Bail if debug build, discard otherwise
1535 }
1536 if (!success)
1537 work->result.codeAddress = NULL;
1538 return isCompile;
1539 }
1540
dvmCompilerCacheFlush(long start,long end,long flags)1541 void dvmCompilerCacheFlush(long start, long end, long flags) {
1542 /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1543 }
1544
1545 //#endif
1546