• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26 
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
30 
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34 
35 /*
36  * Determine the initial instruction set to be used for this trace.
37  * Later components may decide to change this.
38  */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42     return DALVIK_JIT_IA32;
43 }
44 
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47     return DALVIK_JIT_IA32;
48 }
49 
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53       return NULL;
54 }
55 
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62 
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64     /* Target-specific configuration */
65     gDvmJit.jitTableSize = 1 << 12;
66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67     if (gDvmJit.threshold == 0) {
68         gDvmJit.threshold = 255;
69     }
70     if (gDvmJit.codeCacheSize == DEFAULT_CODE_CACHE_SIZE) {
71       gDvmJit.codeCacheSize = 512 * 1024;
72     } else if ((gDvmJit.codeCacheSize == 0) && (gDvm.executionMode == kExecutionModeJit)) {
73       gDvm.executionMode = kExecutionModeInterpFast;
74     }
75     gDvmJit.optLevel = kJitOptLevelO1;
76 
77     //Disable Method-JIT
78     gDvmJit.disableOpt |= (1 << kMethodJit);
79 
80 #if defined(WITH_SELF_VERIFICATION)
81     /* Force into blocking mode */
82     gDvmJit.blockingMode = true;
83     gDvm.nativeDebuggerActive = true;
84 #endif
85 
86     // Make sure all threads have current values
87     dvmJitUpdateThreadStateAll();
88 
89     return true;
90 }
91 
dvmCompilerPatchInlineCache(void)92 void dvmCompilerPatchInlineCache(void)
93 {
94     int i;
95     PredictedChainingCell *minAddr, *maxAddr;
96 
97     /* Nothing to be done */
98     if (gDvmJit.compilerICPatchIndex == 0) return;
99 
100     /*
101      * Since all threads are already stopped we don't really need to acquire
102      * the lock. But race condition can be easily introduced in the future w/o
103      * paying attention so we still acquire the lock here.
104      */
105     dvmLockMutex(&gDvmJit.compilerICPatchLock);
106 
107     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
108 
109     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
110 
111     /* Initialize the min/max address range */
112     minAddr = (PredictedChainingCell *)
113         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
114     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
115 
116     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
117         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
118         PredictedChainingCell *cellAddr = workOrder->cellAddr;
119         PredictedChainingCell *cellContent = &workOrder->cellContent;
120         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
121                                                 workOrder->classLoader);
122 
123         assert(clazz->serialNumber == workOrder->serialNumber);
124 
125         /* Use the newly resolved clazz pointer */
126         cellContent->clazz = clazz;
127 
128         if (cellAddr->clazz == NULL) {
129             COMPILER_TRACE_CHAINING(
130                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
131                       cellAddr,
132                       cellContent->clazz->descriptor,
133                       cellContent->method->name));
134         } else {
135             COMPILER_TRACE_CHAINING(
136                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
137                       "patched",
138                       cellAddr,
139                       cellAddr->clazz->descriptor,
140                       cellContent->clazz->descriptor,
141                       cellContent->method->name));
142         }
143 
144         /* Patch the chaining cell */
145         *cellAddr = *cellContent;
146         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
147         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
148     }
149 
150     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
151 
152     gDvmJit.compilerICPatchIndex = 0;
153     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
154 }
155 
156 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)157 void dvmCompilerCacheClear(char *start, size_t size)
158 {
159     /* "0xFF 0xFF" is an invalid opcode for x86. */
160     memset(start, 0xFF, size);
161 }
162 
163 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)164 void dvmJitCalleeSave(double *saveArea) {
165 }
166 
dvmJitCalleeRestore(double * saveArea)167 void dvmJitCalleeRestore(double *saveArea) {
168 }
169 
dvmJitToInterpSingleStep()170 void dvmJitToInterpSingleStep() {
171 }
172 
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)173 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
174                                             const JitEntry *knownEntry) {
175     return NULL;
176 }
177 
dvmCompilerCodegenDump(CompilationUnit * cUnit)178 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
179 {
180 }
181 
dvmCompilerArchDump(void)182 void dvmCompilerArchDump(void)
183 {
184 }
185 
getTraceBase(const JitEntry * p)186 char *getTraceBase(const JitEntry *p)
187 {
188     return NULL;
189 }
190 
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)191 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
192 {
193 }
194 
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)195 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
196 {
197 }
198 
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)199 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
200 {
201     // Method-based JIT not supported for x86.
202 }
203 
dvmJitScanAllClassPointers(void (* callback)(void *))204 void dvmJitScanAllClassPointers(void (*callback)(void *))
205 {
206 }
207 
208 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)209 static inline int getProfileCount(const JitEntry *entry)
210 {
211     if (entry->dPC == 0 || entry->codeAddress == 0)
212         return 0;
213     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
214 
215     return pExecutionCount ? *pExecutionCount : 0;
216 }
217 
218 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)219 static int sortTraceProfileCount(const void *entry1, const void *entry2)
220 {
221     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
222     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
223 
224     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
225     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
226     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
227 }
228 
229 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()230 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
231 {
232     JitEntry *sortedEntries;
233     int numTraces = 0;
234     unsigned long counts = 0;
235     unsigned int i;
236 
237     /* Make sure that the table is not changing */
238     dvmLockMutex(&gDvmJit.tableLock);
239 
240     /* Sort the entries by descending order */
241     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
242     if (sortedEntries == NULL)
243         goto done;
244     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
245            sizeof(JitEntry) * gDvmJit.jitTableSize);
246     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
247           sortTraceProfileCount);
248 
249     /* Dump the sorted entries */
250     for (i=0; i < gDvmJit.jitTableSize; i++) {
251         if (sortedEntries[i].dPC != 0) {
252             numTraces++;
253         }
254     }
255     if (numTraces == 0)
256         numTraces = 1;
257     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
258 
259     free(sortedEntries);
260 done:
261     dvmUnlockMutex(&gDvmJit.tableLock);
262     return;
263 }
264 
jumpWithRelOffset(char * instAddr,int relOffset)265 void jumpWithRelOffset(char* instAddr, int relOffset) {
266     stream = instAddr;
267     OpndSize immSize = estOpndSizeFromImm(relOffset);
268     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
269     dump_imm(Mnemonic_JMP, immSize, relOffset);
270 }
271 
272 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)273 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
274     stream = instAddr;
275     bool unknown;
276     OpndSize size;
277     int relativeNCG = targetId;
278     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
279     unconditional_jump_int(relativeNCG, size);
280     return NULL;
281 }
282 
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)283 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
284     stream = instAddr;
285     bool unknown;
286     OpndSize size;
287     int relativeNCG = targetId;
288     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
289     conditional_jump_int(cc, relativeNCG, size);
290     return NULL;
291 }
292 
293 /*
294  * Attempt to enqueue a work order to patch an inline cache for a predicted
295  * chaining cell for virtual/interface calls.
296  */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)297 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
298                                     PredictedChainingCell *newContent)
299 {
300     bool result = true;
301 
302     /*
303      * Make sure only one thread gets here since updating the cell (ie fast
304      * path and queueing the request (ie the queued path) have to be done
305      * in an atomic fashion.
306      */
307     dvmLockMutex(&gDvmJit.compilerICPatchLock);
308 
309     /* Fast path for uninitialized chaining cell */
310     if (cellAddr->clazz == NULL &&
311         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
312         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
313 
314         cellAddr->method = newContent->method;
315         cellAddr->branch = newContent->branch;
316         cellAddr->branch2 = newContent->branch2;
317 
318         /*
319          * The update order matters - make sure clazz is updated last since it
320          * will bring the uninitialized chaining cell to life.
321          */
322         android_atomic_release_store((int32_t)newContent->clazz,
323             (volatile int32_t *)(void*) &cellAddr->clazz);
324         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
325         UPDATE_CODE_CACHE_PATCHES();
326 
327         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
328 
329 #if 0
330         MEM_BARRIER();
331         cellAddr->clazz = newContent->clazz;
332         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
333 #endif
334 #if defined(WITH_JIT_TUNING)
335         gDvmJit.icPatchInit++;
336 #endif
337         COMPILER_TRACE_CHAINING(
338             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
339                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
340     /* Check if this is a frequently missed clazz */
341     } else if (cellAddr->stagedClazz != newContent->clazz) {
342         /* Not proven to be frequent yet - build up the filter cache */
343         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
344 
345         cellAddr->stagedClazz = newContent->clazz;
346 
347         UPDATE_CODE_CACHE_PATCHES();
348         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
349 
350 #if defined(WITH_JIT_TUNING)
351         gDvmJit.icPatchRejected++;
352 #endif
353     /*
354      * Different classes but same method implementation - it is safe to just
355      * patch the class value without the need to stop the world.
356      */
357     } else if (cellAddr->method == newContent->method) {
358         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
359 
360         cellAddr->clazz = newContent->clazz;
361         /* No need to flush the cache here since the branch is not patched */
362         UPDATE_CODE_CACHE_PATCHES();
363 
364         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
365 
366 #if defined(WITH_JIT_TUNING)
367         gDvmJit.icPatchLockFree++;
368 #endif
369     /*
370      * Cannot patch the chaining cell inline - queue it until the next safe
371      * point.
372      */
373     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
374         int index = gDvmJit.compilerICPatchIndex++;
375         const ClassObject *clazz = newContent->clazz;
376 
377         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
378         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
379         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
380         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
381         /* For verification purpose only */
382         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
383 
384 #if defined(WITH_JIT_TUNING)
385         gDvmJit.icPatchQueued++;
386 #endif
387         COMPILER_TRACE_CHAINING(
388             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
389                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
390     } else {
391     /* Queue is full - just drop this patch request */
392 #if defined(WITH_JIT_TUNING)
393         gDvmJit.icPatchDropped++;
394 #endif
395 
396         COMPILER_TRACE_CHAINING(
397             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
398                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
399     }
400 
401     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
402     return result;
403 }
404 
405 /*
406  * This method is called from the invoke templates for virtual and interface
407  * methods to speculatively setup a chain to the callee. The templates are
408  * written in assembly and have setup method, cell, and clazz at r0, r2, and
409  * r3 respectively, so there is a unused argument in the list. Upon return one
410  * of the following three results may happen:
411  *   1) Chain is not setup because the callee is native. Reset the rechain
412  *      count to a big number so that it will take a long time before the next
413  *      rechain attempt to happen.
414  *   2) Chain is not setup because the callee has not been created yet. Reset
415  *      the rechain count to a small number and retry in the near future.
416  *   3) Ask all other threads to stop before patching this chaining cell.
417  *      This is required because another thread may have passed the class check
418  *      but hasn't reached the chaining cell yet to follow the chain. If we
419  *      patch the content before halting the other thread, there could be a
420  *      small window for race conditions to happen that it may follow the new
421  *      but wrong chain to invoke a different method.
422  */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)423 const Method *dvmJitToPatchPredictedChain(const Method *method,
424                                           Thread *self,
425                                           PredictedChainingCell *cell,
426                                           const ClassObject *clazz)
427 {
428     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
429     /* Don't come back here for a long time if the method is native */
430     if (dvmIsNativeMethod(method)) {
431         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
432 
433         /*
434          * Put a non-zero/bogus value in the clazz field so that it won't
435          * trigger immediate patching and will continue to fail to match with
436          * a real clazz pointer.
437          */
438         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
439 
440         UPDATE_CODE_CACHE_PATCHES();
441         PROTECT_CODE_CACHE(cell, sizeof(*cell));
442         COMPILER_TRACE_CHAINING(
443             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
444                   cell, method->name));
445         goto done;
446     }
447     {
448     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
449 
450     /*
451      * Compilation not made yet for the callee. Reset the counter to a small
452      * value and come back to check soon.
453      */
454     if ((tgtAddr == 0) ||
455         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
456         COMPILER_TRACE_CHAINING(
457             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
458                   cell, method->clazz->descriptor, method->name));
459         goto done;
460     }
461 
462     PredictedChainingCell newCell;
463 
464     if (cell->clazz == NULL) {
465         newRechainCount = self->icRechainCount;
466     }
467 
468     int relOffset = (int) tgtAddr - (int)cell;
469     OpndSize immSize = estOpndSizeFromImm(relOffset);
470     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
471     relOffset -= jumpSize;
472     COMPILER_TRACE_CHAINING(
473             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
474                   cell, method->clazz->descriptor, method->name, jumpSize));
475     //can't use stream here since it is used by the compilation thread
476     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
477 
478     newCell.clazz = clazz;
479     newCell.method = method;
480 
481     /*
482      * Enter the work order to the queue and the chaining cell will be patched
483      * the next time a safe point is entered.
484      *
485      * If the enqueuing fails reset the rechain count to a normal value so that
486      * it won't get indefinitely delayed.
487      */
488     inlineCachePatchEnqueue(cell, &newCell);
489     }
490 done:
491     self->icRechainCount = newRechainCount;
492     return method;
493 }
494 
495 /*
496  * Unchain a trace given the starting address of the translation
497  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
498  * For ARM, it returns the address following the last cell unchained.
499  * For IA, it returns NULL since cacheflush is not required for IA.
500  */
dvmJitUnchain(void * codeAddr)501 u4* dvmJitUnchain(void* codeAddr)
502 {
503     /* codeAddr is 4-byte aligned, so is chain cell count offset */
504     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
505     u2 chainCellCountOffset = *pChainCellCountOffset;
506     /* chain cell counts information is 4-byte aligned */
507     ChainCellCounts *pChainCellCounts =
508           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
509     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
510     u2 chainCellOffset = *pChainCellOffset;
511     u1* pChainCells;
512     int i,j;
513     PredictedChainingCell *predChainCell;
514     int padding;
515 
516     /* Locate the beginning of the chain cell region */
517     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
518 
519     /* The cells are sorted in order - walk through them and reset */
520     for (i = 0; i < kChainingCellGap; i++) {
521         /* for hot, normal, singleton chaining:
522                nop  //padding.
523                jmp 0
524                mov imm32, reg1
525                mov imm32, reg2
526                call reg2
527            after chaining:
528                nop
529                jmp imm
530                mov imm32, reg1
531                mov imm32, reg2
532                call reg2
533            after unchaining:
534                nop
535                jmp 0
536                mov imm32, reg1
537                mov imm32, reg2
538                call reg2
539            Space occupied by the chaining cell in bytes: nop is for padding,
540                 jump 0, the target 0 is 4 bytes aligned.
541            Space for predicted chaining: 5 words = 20 bytes
542         */
543         int elemSize = 0;
544         if (i == kChainingCellInvokePredicted) {
545             elemSize = 20;
546         }
547         COMPILER_TRACE_CHAINING(
548             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
549 
550         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
551             switch(i) {
552                 case kChainingCellNormal:
553                 case kChainingCellHot:
554                 case kChainingCellInvokeSingleton:
555                 case kChainingCellBackwardBranch:
556                     COMPILER_TRACE_CHAINING(
557                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
558                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
559                     elemSize = 4+5+5+2;
560                     memset(pChainCells, 0, 4);
561                     break;
562                 case kChainingCellInvokePredicted:
563                     COMPILER_TRACE_CHAINING(
564                         ALOGI("Jit Runtime: unchaining of predicted"));
565                     /* 4-byte aligned */
566                     padding = (4 - ((u4)pChainCells & 3)) & 3;
567                     pChainCells += padding;
568                     predChainCell = (PredictedChainingCell *) pChainCells;
569                     /*
570                      * There could be a race on another mutator thread to use
571                      * this particular predicted cell and the check has passed
572                      * the clazz comparison. So we cannot safely wipe the
573                      * method and branch but it is safe to clear the clazz,
574                      * which serves as the key.
575                      */
576                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
577                     break;
578                 default:
579                     ALOGE("Unexpected chaining type: %d", i);
580                     dvmAbort();  // dvmAbort OK here - can't safely recover
581             }
582             COMPILER_TRACE_CHAINING(
583                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
584             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
585         }
586     }
587     return NULL;
588 }
589 
590 /* Unchain all translation in the cache. */
dvmJitUnchainAll()591 void dvmJitUnchainAll()
592 {
593     ALOGV("Jit Runtime: unchaining all");
594     if (gDvmJit.pJitEntryTable != NULL) {
595         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
596         dvmLockMutex(&gDvmJit.tableLock);
597 
598         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
599 
600         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
601             if (gDvmJit.pJitEntryTable[i].dPC &&
602                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
603                 gDvmJit.pJitEntryTable[i].codeAddress) {
604                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
605             }
606         }
607 
608         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
609 
610         dvmUnlockMutex(&gDvmJit.tableLock);
611         gDvmJit.translationChains = 0;
612     }
613     gDvmJit.hasNewChain = false;
614 }
615 
616 #define P_GPR_1 PhysicalReg_EBX
617 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()618 static void insertJumpHelp()
619 {
620     int rem = (uint)stream % 4;
621     int nop_size = 3 - rem;
622     dump_nop(nop_size);
623     unconditional_jump_int(0, OpndSize_32);
624     return;
625 }
626 
627 /* Chaining cell for code that may need warmup. */
628 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
629                  blx r0
630                  data 0xb23a //bytecode address: 0x5115b23a
631                  data 0x5115
632    IA32 assembly:
633                   jmp  0 //5 bytes
634                   movl address, %ebx
635                   movl dvmJitToInterpNormal, %eax
636                   call %eax
637                   <-- return address
638 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)639 static void handleNormalChainingCell(CompilationUnit *cUnit,
640                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
641 {
642     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
643           cUnit->method->name, blockId, offset, stream - streamMethodStart);
644     if(dump_x86_inst)
645         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
646               offset, stream - streamMethodStart, stream);
647     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
648      * reslove the multithreading issue.
649      */
650     insertJumpHelp();
651     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
652     scratchRegs[0] = PhysicalReg_EAX;
653     call_dvmJitToInterpNormal();
654     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
655 }
656 
657 /*
658  * Chaining cell for instructions that immediately following already translated
659  * code.
660  */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)661 static void handleHotChainingCell(CompilationUnit *cUnit,
662                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
663 {
664     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
665           cUnit->method->name, blockId, offset, stream - streamMethodStart);
666     if(dump_x86_inst)
667         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
668               offset, stream - streamMethodStart, stream);
669     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
670      * reslove the multithreading issue.
671      */
672     insertJumpHelp();
673     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
674     scratchRegs[0] = PhysicalReg_EAX;
675     call_dvmJitToInterpTraceSelect();
676     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
677 }
678 
679 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)680 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
681                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
682 {
683     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
684           cUnit->method->name, blockId, offset, stream - streamMethodStart);
685     if(dump_x86_inst)
686         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
687               offset, stream - streamMethodStart, stream);
688     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
689      * reslove the multithreading issue.
690      */
691     insertJumpHelp();
692     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
693     scratchRegs[0] = PhysicalReg_EAX;
694     call_dvmJitToInterpNormal();
695     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
696 }
697 
698 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)699 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
700                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
701 {
702     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
703           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
704     if(dump_x86_inst)
705         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
706               blockId, stream - streamMethodStart, stream);
707     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
708      * reslove the multithreading issue.
709      */
710     insertJumpHelp();
711     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
712     scratchRegs[0] = PhysicalReg_EAX;
713     call_dvmJitToInterpTraceSelect();
714     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
715 }
716 #undef P_GPR_1
717 
718 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)719 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
720 {
721     if(dump_x86_inst)
722         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
723               blockId, stream - streamMethodStart, stream);
724 #ifndef PREDICTED_CHAINING
725     //assume rPC for callee->insns in %ebx
726     scratchRegs[0] = PhysicalReg_EAX;
727 #if defined(WITH_JIT_TUNING)
728     /* Predicted chaining is not enabled. Fall back to interpreter and
729      * indicate that predicted chaining was not done.
730      */
731     move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
732 #endif
733     call_dvmJitToInterpTraceSelectNoChain();
734 #else
735     /* make sure section for predicited chaining cell is 4-byte aligned */
736     //int padding = (4 - ((u4)stream & 3)) & 3;
737     //stream += padding;
738     int* streamData = (int*)stream;
739     /* Should not be executed in the initial state */
740     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
741     streamData[1] = 0;
742     /* To be filled: class */
743     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
744     /* To be filled: method */
745     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
746     /*
747      * Rechain count. The initial value of 0 here will trigger chaining upon
748      * the first invocation of this callsite.
749      */
750     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
751 #if 0
752     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
753           *((int*)(stream+8)), *((int*)(stream+12)));
754 #endif
755     stream += 20; //5 *4
756 #endif
757 }
758 
759 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)760 static void handlePCReconstruction(CompilationUnit *cUnit,
761                                    LowOpBlockLabel *targetLabel)
762 {
763 #if 0
764     LowOp **pcrLabel =
765         (LowOp **) cUnit->pcReconstructionList.elemList;
766     int numElems = cUnit->pcReconstructionList.numUsed;
767     int i;
768     for (i = 0; i < numElems; i++) {
769         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
770         /* r0 = dalvik PC */
771         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
772         genUnconditionalBranch(cUnit, targetLabel);
773     }
774 #endif
775 }
776 
777 //use O0 code generator for hoisted checks outside of the loop
778 /*
779  * vA = arrayReg;
780  * vB = idxReg;
781  * vC = endConditionReg;
782  * arg[0] = maxC
783  * arg[1] = minC
784  * arg[2] = loopBranchConditionCode
785  */
786 #define P_GPR_1 PhysicalReg_EBX
787 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)788 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
789 {
790     /*
791      * NOTE: these synthesized blocks don't have ssa names assigned
792      * for Dalvik registers.  However, because they dominate the following
793      * blocks we can simply use the Dalvik name w/ subscript 0 as the
794      * ssa name.
795      */
796     DecodedInstruction *dInsn = &mir->dalvikInsn;
797     const int maxC = dInsn->arg[0];
798 
799     /* assign array in virtual register to P_GPR_1 */
800     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
801     /* assign index in virtual register to P_GPR_2 */
802     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
803     export_pc();
804     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
805     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
806     int delta = maxC;
807     /*
808      * If the loop end condition is ">=" instead of ">", then the largest value
809      * of the index is "endCondition - 1".
810      */
811     if (dInsn->arg[2] == OP_IF_GE) {
812         delta--;
813     }
814 
815     if (delta < 0) { //+delta
816         //if P_GPR_2 is mapped to a VR, we can't do this
817         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
818     } else if(delta > 0) {
819         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
820     }
821     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
822     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
823 }
824 
825 /*
826  * vA = arrayReg;
827  * vB = idxReg;
828  * vC = endConditionReg;
829  * arg[0] = maxC
830  * arg[1] = minC
831  * arg[2] = loopBranchConditionCode
832  */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)833 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
834 {
835     DecodedInstruction *dInsn = &mir->dalvikInsn;
836     const int maxC = dInsn->arg[0];
837 
838     /* assign array in virtual register to P_GPR_1 */
839     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
840     /* assign index in virtual register to P_GPR_2 */
841     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
842     export_pc();
843     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
844     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
845 
846     if (maxC < 0) {
847         //if P_GPR_2 is mapped to a VR, we can't do this
848         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
849     } else if(maxC > 0) {
850         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
851     }
852     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
853     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
854 
855 }
856 #undef P_GPR_1
857 #undef P_GPR_2
858 
859 /*
860  * vA = idxReg;
861  * vB = minC;
862  */
863 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)864 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
865 {
866     DecodedInstruction *dInsn = &mir->dalvikInsn;
867     const int minC = dInsn->vB;
868     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
869     export_pc();
870     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
871     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
872 }
873 #undef P_GPR_1
874 
875 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)876 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
877 {
878     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
879     if(gDvm.executionMode == kExecutionModeNcgO0) {
880         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
881         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
882         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
883         export_pc(); //use %edx
884         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
885         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
886         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
887     } else {
888         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
889         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
890         nullCheck(5, false, 1, mir->dalvikInsn.vC);
891         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
892         compare_reg_reg(4, false, 6, false);
893     }
894 
895     //immdiate will be updated later in genLandingPadForMispredictedCallee
896     streamMisPred = stream;
897     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
898 }
899 #endif
900 
901 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)902 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
903 {
904     ExecutionMode origMode = gDvm.executionMode;
905     gDvm.executionMode = kExecutionModeNcgO0;
906     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
907         case kMirOpPhi: {
908             break;
909         }
910         case kMirOpNullNRangeUpCheck: {
911             genHoistedChecksForCountUpLoop(cUnit, mir);
912             break;
913         }
914         case kMirOpNullNRangeDownCheck: {
915             genHoistedChecksForCountDownLoop(cUnit, mir);
916             break;
917         }
918         case kMirOpLowerBound: {
919             genHoistedLowerBoundCheck(cUnit, mir);
920             break;
921         }
922         case kMirOpPunt: {
923             break;
924         }
925 #ifdef WITH_JIT_INLINING
926         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
927             genValidationForPredictedInline(cUnit, mir);
928             break;
929         }
930 #endif
931         default:
932             break;
933     }
934     gDvm.executionMode = origMode;
935 }
936 
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)937 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
938                                 int bodyId)
939 {
940     /*
941      * Next, create two branches - one branch over to the loop body and the
942      * other branch to the PCR cell to punt.
943      */
944     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
945     //setupResourceMasks(branchToBody);
946     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
947 
948 #if 0
949     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
950     branchToPCR->opCode = kThumbBUncond;
951     branchToPCR->generic.target = (LIR *) pcrLabel;
952     setupResourceMasks(branchToPCR);
953     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
954 #endif
955 }
956 
957 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)958 bool mergeBlock(BasicBlock *bb) {
959     if(bb->blockType == kDalvikByteCode &&
960        bb->firstMIRInsn != NULL &&
961        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
962         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
963         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
964        bb->fallThrough == NULL) {// &&
965        //cUnit->hasLoop) {
966         //ALOGI("merge blocks ending with goto at index %d", i);
967         MIR* prevInsn = bb->lastMIRInsn->prev;
968         if(bb->taken == NULL) return false;
969         MIR* mergeInsn = bb->taken->firstMIRInsn;
970         if(mergeInsn == NULL) return false;
971         if(prevInsn == NULL) {//the block has a single instruction
972             bb->firstMIRInsn = mergeInsn;
973         } else {
974             prevInsn->next = mergeInsn; //remove goto from the chain
975         }
976         mergeInsn->prev = prevInsn;
977         bb->lastMIRInsn = bb->taken->lastMIRInsn;
978         bb->taken->firstMIRInsn = NULL; //block being merged in
979         bb->fallThrough = bb->taken->fallThrough;
980         bb->taken = bb->taken->taken;
981         return true;
982     }
983     return false;
984 }
985 
genTraceProfileEntry(CompilationUnit * cUnit)986 static int genTraceProfileEntry(CompilationUnit *cUnit)
987 {
988     cUnit->headerSize = 6;
989     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
990         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
991         return 12;
992     } else {
993         return 4;
994     }
995 
996 }
997 
998 #define PRINT_BUFFER_LEN 1024
999 /* Print the code block in code cache in the range of [startAddr, endAddr)
1000  * in readable format.
1001  */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)1002 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
1003 {
1004     char strbuf[PRINT_BUFFER_LEN];
1005     unsigned char *addr;
1006     unsigned char *next_addr;
1007     int n;
1008 
1009     if (gDvmJit.printBinary) {
1010         // print binary in bytes
1011         n = 0;
1012         for (addr = startAddr; addr < endAddr; addr++) {
1013             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
1014             if (n > PRINT_BUFFER_LEN - 10) {
1015                 ALOGD("## %s", strbuf);
1016                 n = 0;
1017             }
1018         }
1019         if (n > 0)
1020             ALOGD("## %s", strbuf);
1021     }
1022 
1023     // print disassembled instructions
1024     addr = startAddr;
1025     while (addr < endAddr) {
1026         next_addr = reinterpret_cast<unsigned char*>
1027             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1028                                        strbuf, PRINT_BUFFER_LEN));
1029         if (addr != next_addr) {
1030             ALOGD("**  %p: %s", addr, strbuf);
1031         } else {                // check whether this is nop padding
1032             if (addr[0] == 0x90) {
1033                 ALOGD("**  %p: NOP (1 byte)", addr);
1034                 next_addr += 1;
1035             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1036                 ALOGD("**  %p: NOP (2 bytes)", addr);
1037                 next_addr += 2;
1038             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1039                 ALOGD("**  %p: NOP (3 bytes)", addr);
1040                 next_addr += 3;
1041             } else {
1042                 ALOGD("** unable to decode binary at %p", addr);
1043                 break;
1044             }
1045         }
1046         addr = next_addr;
1047     }
1048 }
1049 
1050 /* 4 is the number of additional bytes needed for chaining information for trace:
1051  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1052 #define EXTRA_BYTES_FOR_CHAINING 4
1053 
1054 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1055 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1056 {
1057     dump_x86_inst = cUnit->printMe;
1058     /* Used to hold the labels of each block */
1059     LowOpBlockLabel *labelList =
1060         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1061     LowOp *headLIR = NULL;
1062     GrowableList chainingListByType[kChainingCellLast];
1063     unsigned int i, padding;
1064 
1065     /*
1066      * Initialize various types chaining lists.
1067      */
1068     for (i = 0; i < kChainingCellLast; i++) {
1069         dvmInitGrowableList(&chainingListByType[i], 2);
1070     }
1071 
1072     /* Clear the visited flag for each block */
1073     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1074                                           kAllNodes, false /* isIterative */);
1075 
1076     GrowableListIterator iterator;
1077     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1078 
1079     /* Traces start with a profiling entry point.  Generate it here */
1080     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1081 
1082     //BasicBlock **blockList = cUnit->blockList;
1083     GrowableList *blockList = &cUnit->blockList;
1084     BasicBlock *bb;
1085 
1086     info->codeAddress = NULL;
1087     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1088     streamStart = stream; /* trace start before alignment */
1089 
1090     // TODO: compile into a temporary buffer and then copy into the code cache.
1091     // That would let us leave the code cache unprotected for a shorter time.
1092     size_t unprotected_code_cache_bytes =
1093             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed;
1094     UNPROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1095 
1096     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1097     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1098     streamMethodStart = stream; /* code start */
1099     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1100         labelList[i].lop.generic.offset = -1;
1101     }
1102     cUnit->exceptionBlockId = -1;
1103     for (i = 0; i < blockList->numUsed; i++) {
1104         bb = (BasicBlock *) blockList->elemList[i];
1105         if(bb->blockType == kExceptionHandling)
1106             cUnit->exceptionBlockId = i;
1107     }
1108     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1109     if(gDvm.executionMode == kExecutionModeNcgO1) {
1110         //merge blocks ending with "goto" with the fall through block
1111         if (cUnit->jitMode != kJitLoop)
1112             for (i = 0; i < blockList->numUsed; i++) {
1113                 bb = (BasicBlock *) blockList->elemList[i];
1114                 bool merged = mergeBlock(bb);
1115                 while(merged) merged = mergeBlock(bb);
1116             }
1117         for (i = 0; i < blockList->numUsed; i++) {
1118             bb = (BasicBlock *) blockList->elemList[i];
1119             if(bb->blockType == kDalvikByteCode &&
1120                bb->firstMIRInsn != NULL) {
1121                 preprocessingBB(bb);
1122             }
1123         }
1124         preprocessingTrace();
1125     }
1126 
1127     /* Handle the content in each basic block */
1128     for (i = 0; ; i++) {
1129         MIR *mir;
1130         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1131         if (bb == NULL) break;
1132         if (bb->visited == true) continue;
1133 
1134         labelList[i].immOpnd.value = bb->startOffset;
1135 
1136         if (bb->blockType >= kChainingCellLast) {
1137             /*
1138              * Append the label pseudo LIR first. Chaining cells will be handled
1139              * separately afterwards.
1140              */
1141             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1142         }
1143 
1144         if (bb->blockType == kEntryBlock) {
1145             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1146             if (bb->firstMIRInsn == NULL) {
1147                 continue;
1148             } else {
1149               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1150                                   //&labelList[blockList[i]->fallThrough->id]);
1151             }
1152         } else if (bb->blockType == kExitBlock) {
1153             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1154             labelList[i].lop.generic.offset = (stream - streamMethodStart);
1155             goto gen_fallthrough;
1156         } else if (bb->blockType == kDalvikByteCode) {
1157             if (bb->hidden == true) continue;
1158             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1159             /* Reset the register state */
1160 #if 0
1161             resetRegisterScoreboard(cUnit);
1162 #endif
1163         } else {
1164             switch (bb->blockType) {
1165                 case kChainingCellNormal:
1166                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1167                     /* handle the codegen later */
1168                     dvmInsertGrowableList(
1169                         &chainingListByType[kChainingCellNormal], i);
1170                     break;
1171                 case kChainingCellInvokeSingleton:
1172                     labelList[i].lop.opCode2 =
1173                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1174                     labelList[i].immOpnd.value =
1175                         (int) bb->containingMethod;
1176                     /* handle the codegen later */
1177                     dvmInsertGrowableList(
1178                         &chainingListByType[kChainingCellInvokeSingleton], i);
1179                     break;
1180                 case kChainingCellInvokePredicted:
1181                     labelList[i].lop.opCode2 =
1182                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1183                    /*
1184                      * Move the cached method pointer from operand 1 to 0.
1185                      * Operand 0 was clobbered earlier in this routine to store
1186                      * the block starting offset, which is not applicable to
1187                      * predicted chaining cell.
1188                      */
1189                     //TODO
1190                     //labelList[i].operands[0] = labelList[i].operands[1];
1191 
1192                     /* handle the codegen later */
1193                     dvmInsertGrowableList(
1194                         &chainingListByType[kChainingCellInvokePredicted], i);
1195                     break;
1196                 case kChainingCellHot:
1197                     labelList[i].lop.opCode2 =
1198                         ATOM_PSEUDO_CHAINING_CELL_HOT;
1199                     /* handle the codegen later */
1200                     dvmInsertGrowableList(
1201                         &chainingListByType[kChainingCellHot], i);
1202                     break;
1203                 case kPCReconstruction:
1204                     /* Make sure exception handling block is next */
1205                     labelList[i].lop.opCode2 =
1206                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1207                     //assert (i == cUnit->numBlocks - 2);
1208                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1209                     handlePCReconstruction(cUnit,
1210                                            &labelList[cUnit->puntBlock->id]);
1211                     break;
1212                 case kExceptionHandling:
1213                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1214                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1215                     //if (cUnit->pcReconstructionList.numUsed) {
1216                         scratchRegs[0] = PhysicalReg_EAX;
1217                         jumpToInterpPunt();
1218                         //call_dvmJitToInterpPunt();
1219                     //}
1220                     break;
1221                 case kChainingCellBackwardBranch:
1222                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1223                     /* handle the codegen later */
1224                     dvmInsertGrowableList(
1225                         &chainingListByType[kChainingCellBackwardBranch],
1226                         i);
1227                     break;
1228                 default:
1229                     break;
1230             }
1231             continue;
1232         }
1233         {
1234         //LowOp *headLIR = NULL;
1235         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1236         const u2 *startCodePtr = dexCode->insns;
1237         const u2 *codePtr;
1238         labelList[i].lop.generic.offset = (stream - streamMethodStart);
1239         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1240               bb->id, bb->blockType, bb->hidden);
1241         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1242             bb = nextBB;
1243             bb->visited = true;
1244             cUnit->nextCodegenBlock = NULL;
1245 
1246         if(gDvm.executionMode == kExecutionModeNcgO1 &&
1247            bb->blockType != kEntryBlock &&
1248            bb->firstMIRInsn != NULL) {
1249             startOfBasicBlock(bb);
1250             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1251             endOfBasicBlock(bb);
1252             if(cg_ret < 0) {
1253                 endOfTrace(true/*freeOnly*/);
1254                 cUnit->baseAddr = NULL;
1255                 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1256                 return;
1257             }
1258         } else {
1259         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1260             startOfBasicBlock(bb); //why here for O0
1261             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1262             if((int)dalvikOpCode >= (int)kMirOpFirst) {
1263                 handleExtendedMIR(cUnit, mir);
1264                 continue;
1265             }
1266             InstructionFormat dalvikFormat =
1267                 dexGetFormatFromOpcode(dalvikOpCode);
1268             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1269                   mir->offset, dalvikOpCode, dalvikFormat);
1270             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1271             /* Remember the first LIR for this block */
1272             if (headLIR == NULL) {
1273                 headLIR = (LowOp*)boundaryLIR;
1274             }
1275             bool notHandled = true;
1276             /*
1277              * Debugging: screen the opcode first to see if it is in the
1278              * do[-not]-compile list
1279              */
1280             bool singleStepMe =
1281                 gDvmJit.includeSelectedOp !=
1282                 ((gDvmJit.opList[dalvikOpCode >> 3] &
1283                   (1 << (dalvikOpCode & 0x7))) !=
1284                  0);
1285             if (singleStepMe || cUnit->allSingleStep) {
1286             } else {
1287                 codePtr = startCodePtr + mir->offset;
1288                 //lower each byte code, update LIR
1289                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1290                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1291                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1292                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1293                     gDvmJit.codeCacheFull = true;
1294                     cUnit->baseAddr = NULL;
1295                     endOfTrace(true/*freeOnly*/);
1296                     PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1297                     return;
1298                 }
1299             }
1300             if (notHandled) {
1301                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1302                      mir->offset,
1303                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1304                      dalvikFormat);
1305                 dvmAbort();
1306                 break;
1307             }
1308         } // end for
1309         } // end else //JIT + O0 code generator
1310         }
1311         } // end for
1312         /* Eliminate redundant loads/stores and delay stores into later slots */
1313 #if 0
1314         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1315                                            cUnit->lastLIRInsn);
1316 #endif
1317         if (headLIR) headLIR = NULL;
1318 gen_fallthrough:
1319         /*
1320          * Check if the block is terminated due to trace length constraint -
1321          * insert an unconditional branch to the chaining cell.
1322          */
1323         if (bb->needFallThroughBranch) {
1324             jumpToBasicBlock(stream, bb->fallThrough->id);
1325         }
1326 
1327     }
1328 
1329     char* streamChainingStart = (char*)stream;
1330     /* Handle the chaining cells in predefined order */
1331     for (i = 0; i < kChainingCellGap; i++) {
1332         size_t j;
1333         int *blockIdList = (int *) chainingListByType[i].elemList;
1334 
1335         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1336 
1337         /* No chaining cells of this type */
1338         if (cUnit->numChainingCells[i] == 0)
1339             continue;
1340 
1341         /* Record the first LIR for a new type of chaining cell */
1342         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1343         for (j = 0; j < chainingListByType[i].numUsed; j++) {
1344             int blockId = blockIdList[j];
1345             BasicBlock *chainingBlock =
1346                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1347                                                          blockId);
1348 
1349             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1350 
1351             /* Align this chaining cell first */
1352 #if 0
1353             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1354 #endif
1355             /* Insert the pseudo chaining instruction */
1356             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1357 
1358 
1359             switch (chainingBlock->blockType) {
1360                 case kChainingCellNormal:
1361                     handleNormalChainingCell(cUnit,
1362                      chainingBlock->startOffset, blockId, labelList);
1363                     break;
1364                 case kChainingCellInvokeSingleton:
1365                     handleInvokeSingletonChainingCell(cUnit,
1366                         chainingBlock->containingMethod, blockId, labelList);
1367                     break;
1368                 case kChainingCellInvokePredicted:
1369                     handleInvokePredictedChainingCell(cUnit, blockId);
1370                     break;
1371                 case kChainingCellHot:
1372                     handleHotChainingCell(cUnit,
1373                         chainingBlock->startOffset, blockId, labelList);
1374                     break;
1375                 case kChainingCellBackwardBranch:
1376                     handleBackwardBranchChainingCell(cUnit,
1377                         chainingBlock->startOffset, blockId, labelList);
1378                     break;
1379                 default:
1380                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
1381                     dvmAbort();
1382                     break;
1383             }
1384 
1385             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1386                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1387                 gDvmJit.codeCacheFull = true;
1388                 cUnit->baseAddr = NULL;
1389                 endOfTrace(true); /* need to free structures */
1390                 PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1391                 return;
1392             }
1393         }
1394     }
1395 #if 0
1396     dvmCompilerApplyGlobalOptimizations(cUnit);
1397 #endif
1398     endOfTrace(false);
1399 
1400     if (gDvmJit.codeCacheFull) {
1401         /* We hit code cache size limit inside endofTrace(false).
1402          * Bail out for this trace!
1403          */
1404         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1405         cUnit->baseAddr = NULL;
1406         PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1407         return;
1408     }
1409 
1410     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1411     padding = (4 - ((u4)stream & 3)) & 3;
1412     stream += padding;
1413     ChainCellCounts chainCellCounts;
1414     /* Install the chaining cell counts */
1415     for (i=0; i< kChainingCellGap; i++) {
1416         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1417     }
1418     char* streamCountStart = (char*)stream;
1419     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1420     stream += sizeof(chainCellCounts);
1421 
1422     cUnit->baseAddr = streamMethodStart;
1423     cUnit->totalSize = (stream - streamStart);
1424     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1425         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1426         gDvmJit.codeCacheFull = true;
1427         cUnit->baseAddr = NULL;
1428         PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1429         return;
1430     }
1431 
1432     /* write chaining cell count offset & chaining cell offset */
1433     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1434     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1435     pOffset[1] = streamChainingStart - streamMethodStart;
1436 
1437     PROTECT_CODE_CACHE(streamStart, unprotected_code_cache_bytes);
1438 
1439     gDvmJit.codeCacheByteUsed += (stream - streamStart);
1440     if (cUnit->printMe) {
1441         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1442         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1443         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1444               cUnit->method->clazz->descriptor, cUnit->method->name,
1445               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1446         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1447               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1448         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1449     }
1450     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1451           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1452           cUnit->totalSize, gDvmJit.codeCache);
1453 
1454     gDvmJit.numCompilations++;
1455 
1456     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1457 }
1458 
1459 /*
1460  * Perform translation chain operation.
1461  */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1462 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1463 {
1464 #ifdef JIT_CHAIN
1465     int relOffset = (int) tgtAddr - (int)branchAddr;
1466 
1467     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1468         (gDvmJit.codeCacheFull == false)) {
1469 
1470         gDvmJit.translationChains++;
1471 
1472         //OpndSize immSize = estOpndSizeFromImm(relOffset);
1473         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1474         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1475          * the original code sequence.
1476          */
1477         OpndSize immSize = OpndSize_32;
1478         relOffset -= 5;
1479         //can't use stream here since it is used by the compilation thread
1480         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1481         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1482         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1483 
1484         gDvmJit.hasNewChain = true;
1485 
1486         COMPILER_TRACE_CHAINING(
1487             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1488                   (int) branchAddr, tgtAddr, relOffset));
1489     }
1490 #endif
1491     return tgtAddr;
1492 }
1493 
1494 /*
1495  * Accept the work and start compiling.  Returns true if compilation
1496  * is attempted.
1497  */
dvmCompilerDoWork(CompilerWorkOrder * work)1498 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1499 {
1500     JitTraceDescription *desc;
1501     bool isCompile;
1502     bool success = true;
1503 
1504     if (gDvmJit.codeCacheFull) {
1505         return false;
1506     }
1507 
1508     switch (work->kind) {
1509         case kWorkOrderTrace:
1510             isCompile = true;
1511             /* Start compilation with maximally allowed trace length */
1512             desc = (JitTraceDescription *)work->info;
1513             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1514                                         work->bailPtr, 0 /* no hints */);
1515             break;
1516         case kWorkOrderTraceDebug: {
1517             bool oldPrintMe = gDvmJit.printMe;
1518             gDvmJit.printMe = true;
1519             isCompile = true;
1520             /* Start compilation with maximally allowed trace length */
1521             desc = (JitTraceDescription *)work->info;
1522             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1523                                         work->bailPtr, 0 /* no hints */);
1524             gDvmJit.printMe = oldPrintMe;
1525             break;
1526         }
1527         case kWorkOrderProfileMode:
1528             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1529             isCompile = false;
1530             break;
1531         default:
1532             isCompile = false;
1533             ALOGE("Jit: unknown work order type");
1534             assert(0);  // Bail if debug build, discard otherwise
1535     }
1536     if (!success)
1537         work->result.codeAddress = NULL;
1538     return isCompile;
1539 }
1540 
dvmCompilerCacheFlush(long start,long end,long flags)1541 void dvmCompilerCacheFlush(long start, long end, long flags) {
1542   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1543 }
1544 
1545 //#endif
1546