• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26 
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
30 
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34 
35 /*
36  * Determine the initial instruction set to be used for this trace.
37  * Later components may decide to change this.
38  */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42     return DALVIK_JIT_IA32;
43 }
44 
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47     return DALVIK_JIT_IA32;
48 }
49 
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53       return NULL;
54 }
55 
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62 
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64     /* Target-specific configuration */
65     gDvmJit.jitTableSize = 1 << 12;
66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67     if (gDvmJit.threshold == 0) {
68         gDvmJit.threshold = 255;
69     }
70     gDvmJit.codeCacheSize = 512*1024;
71     gDvmJit.optLevel = kJitOptLevelO1;
72 
73     //Disable Method-JIT
74     gDvmJit.disableOpt |= (1 << kMethodJit);
75 
76 #if defined(WITH_SELF_VERIFICATION)
77     /* Force into blocking mode */
78     gDvmJit.blockingMode = true;
79     gDvm.nativeDebuggerActive = true;
80 #endif
81 
82     // Make sure all threads have current values
83     dvmJitUpdateThreadStateAll();
84 
85     return true;
86 }
87 
dvmCompilerPatchInlineCache(void)88 void dvmCompilerPatchInlineCache(void)
89 {
90     int i;
91     PredictedChainingCell *minAddr, *maxAddr;
92 
93     /* Nothing to be done */
94     if (gDvmJit.compilerICPatchIndex == 0) return;
95 
96     /*
97      * Since all threads are already stopped we don't really need to acquire
98      * the lock. But race condition can be easily introduced in the future w/o
99      * paying attention so we still acquire the lock here.
100      */
101     dvmLockMutex(&gDvmJit.compilerICPatchLock);
102 
103     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
104 
105     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
106 
107     /* Initialize the min/max address range */
108     minAddr = (PredictedChainingCell *)
109         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
110     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
111 
112     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
113         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
114         PredictedChainingCell *cellAddr = workOrder->cellAddr;
115         PredictedChainingCell *cellContent = &workOrder->cellContent;
116         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
117                                                 workOrder->classLoader);
118 
119         assert(clazz->serialNumber == workOrder->serialNumber);
120 
121         /* Use the newly resolved clazz pointer */
122         cellContent->clazz = clazz;
123 
124         if (cellAddr->clazz == NULL) {
125             COMPILER_TRACE_CHAINING(
126                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
127                       cellAddr,
128                       cellContent->clazz->descriptor,
129                       cellContent->method->name));
130         } else {
131             COMPILER_TRACE_CHAINING(
132                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
133                       "patched",
134                       cellAddr,
135                       cellAddr->clazz->descriptor,
136                       cellContent->clazz->descriptor,
137                       cellContent->method->name));
138         }
139 
140         /* Patch the chaining cell */
141         *cellAddr = *cellContent;
142         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
143         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
144     }
145 
146     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
147 
148     gDvmJit.compilerICPatchIndex = 0;
149     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
150 }
151 
152 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)153 void dvmCompilerCacheClear(char *start, size_t size)
154 {
155     /* "0xFF 0xFF" is an invalid opcode for x86. */
156     memset(start, 0xFF, size);
157 }
158 
159 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)160 void dvmJitCalleeSave(double *saveArea) {
161 }
162 
dvmJitCalleeRestore(double * saveArea)163 void dvmJitCalleeRestore(double *saveArea) {
164 }
165 
dvmJitToInterpSingleStep()166 void dvmJitToInterpSingleStep() {
167 }
168 
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)169 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
170                                             const JitEntry *knownEntry) {
171     return NULL;
172 }
173 
dvmCompilerCodegenDump(CompilationUnit * cUnit)174 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
175 {
176 }
177 
dvmCompilerArchDump(void)178 void dvmCompilerArchDump(void)
179 {
180 }
181 
getTraceBase(const JitEntry * p)182 char *getTraceBase(const JitEntry *p)
183 {
184     return NULL;
185 }
186 
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)187 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
188 {
189 }
190 
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)191 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
192 {
193 }
194 
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)195 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
196 {
197     // Method-based JIT not supported for x86.
198 }
199 
dvmJitScanAllClassPointers(void (* callback)(void *))200 void dvmJitScanAllClassPointers(void (*callback)(void *))
201 {
202 }
203 
204 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)205 static inline int getProfileCount(const JitEntry *entry)
206 {
207     if (entry->dPC == 0 || entry->codeAddress == 0)
208         return 0;
209     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
210 
211     return pExecutionCount ? *pExecutionCount : 0;
212 }
213 
214 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)215 static int sortTraceProfileCount(const void *entry1, const void *entry2)
216 {
217     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
218     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
219 
220     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
221     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
222     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
223 }
224 
225 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()226 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
227 {
228     JitEntry *sortedEntries;
229     int numTraces = 0;
230     unsigned long counts = 0;
231     unsigned int i;
232 
233     /* Make sure that the table is not changing */
234     dvmLockMutex(&gDvmJit.tableLock);
235 
236     /* Sort the entries by descending order */
237     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
238     if (sortedEntries == NULL)
239         goto done;
240     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
241            sizeof(JitEntry) * gDvmJit.jitTableSize);
242     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
243           sortTraceProfileCount);
244 
245     /* Dump the sorted entries */
246     for (i=0; i < gDvmJit.jitTableSize; i++) {
247         if (sortedEntries[i].dPC != 0) {
248             numTraces++;
249         }
250     }
251     if (numTraces == 0)
252         numTraces = 1;
253     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
254 
255     free(sortedEntries);
256 done:
257     dvmUnlockMutex(&gDvmJit.tableLock);
258     return;
259 }
260 
jumpWithRelOffset(char * instAddr,int relOffset)261 void jumpWithRelOffset(char* instAddr, int relOffset) {
262     stream = instAddr;
263     OpndSize immSize = estOpndSizeFromImm(relOffset);
264     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
265     dump_imm(Mnemonic_JMP, immSize, relOffset);
266 }
267 
268 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)269 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
270     stream = instAddr;
271     bool unknown;
272     OpndSize size;
273     int relativeNCG = targetId;
274     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
275     unconditional_jump_int(relativeNCG, size);
276     return NULL;
277 }
278 
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)279 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
280     stream = instAddr;
281     bool unknown;
282     OpndSize size;
283     int relativeNCG = targetId;
284     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
285     conditional_jump_int(cc, relativeNCG, size);
286     return NULL;
287 }
288 
289 /*
290  * Attempt to enqueue a work order to patch an inline cache for a predicted
291  * chaining cell for virtual/interface calls.
292  */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)293 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
294                                     PredictedChainingCell *newContent)
295 {
296     bool result = true;
297 
298     /*
299      * Make sure only one thread gets here since updating the cell (ie fast
300      * path and queueing the request (ie the queued path) have to be done
301      * in an atomic fashion.
302      */
303     dvmLockMutex(&gDvmJit.compilerICPatchLock);
304 
305     /* Fast path for uninitialized chaining cell */
306     if (cellAddr->clazz == NULL &&
307         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
308         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
309 
310         cellAddr->method = newContent->method;
311         cellAddr->branch = newContent->branch;
312         cellAddr->branch2 = newContent->branch2;
313 
314         /*
315          * The update order matters - make sure clazz is updated last since it
316          * will bring the uninitialized chaining cell to life.
317          */
318         android_atomic_release_store((int32_t)newContent->clazz,
319             (volatile int32_t *)(void*) &cellAddr->clazz);
320         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
321         UPDATE_CODE_CACHE_PATCHES();
322 
323         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
324 
325 #if 0
326         MEM_BARRIER();
327         cellAddr->clazz = newContent->clazz;
328         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
329 #endif
330 #if defined(WITH_JIT_TUNING)
331         gDvmJit.icPatchInit++;
332 #endif
333         COMPILER_TRACE_CHAINING(
334             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
335                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
336     /* Check if this is a frequently missed clazz */
337     } else if (cellAddr->stagedClazz != newContent->clazz) {
338         /* Not proven to be frequent yet - build up the filter cache */
339         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340 
341         cellAddr->stagedClazz = newContent->clazz;
342 
343         UPDATE_CODE_CACHE_PATCHES();
344         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
345 
346 #if defined(WITH_JIT_TUNING)
347         gDvmJit.icPatchRejected++;
348 #endif
349     /*
350      * Different classes but same method implementation - it is safe to just
351      * patch the class value without the need to stop the world.
352      */
353     } else if (cellAddr->method == newContent->method) {
354         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
355 
356         cellAddr->clazz = newContent->clazz;
357         /* No need to flush the cache here since the branch is not patched */
358         UPDATE_CODE_CACHE_PATCHES();
359 
360         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
361 
362 #if defined(WITH_JIT_TUNING)
363         gDvmJit.icPatchLockFree++;
364 #endif
365     /*
366      * Cannot patch the chaining cell inline - queue it until the next safe
367      * point.
368      */
369     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
370         int index = gDvmJit.compilerICPatchIndex++;
371         const ClassObject *clazz = newContent->clazz;
372 
373         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
374         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
375         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
376         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
377         /* For verification purpose only */
378         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
379 
380 #if defined(WITH_JIT_TUNING)
381         gDvmJit.icPatchQueued++;
382 #endif
383         COMPILER_TRACE_CHAINING(
384             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
385                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
386     } else {
387     /* Queue is full - just drop this patch request */
388 #if defined(WITH_JIT_TUNING)
389         gDvmJit.icPatchDropped++;
390 #endif
391 
392         COMPILER_TRACE_CHAINING(
393             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
394                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
395     }
396 
397     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
398     return result;
399 }
400 
401 /*
402  * This method is called from the invoke templates for virtual and interface
403  * methods to speculatively setup a chain to the callee. The templates are
404  * written in assembly and have setup method, cell, and clazz at r0, r2, and
405  * r3 respectively, so there is a unused argument in the list. Upon return one
406  * of the following three results may happen:
407  *   1) Chain is not setup because the callee is native. Reset the rechain
408  *      count to a big number so that it will take a long time before the next
409  *      rechain attempt to happen.
410  *   2) Chain is not setup because the callee has not been created yet. Reset
411  *      the rechain count to a small number and retry in the near future.
412  *   3) Ask all other threads to stop before patching this chaining cell.
413  *      This is required because another thread may have passed the class check
414  *      but hasn't reached the chaining cell yet to follow the chain. If we
415  *      patch the content before halting the other thread, there could be a
416  *      small window for race conditions to happen that it may follow the new
417  *      but wrong chain to invoke a different method.
418  */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)419 const Method *dvmJitToPatchPredictedChain(const Method *method,
420                                           Thread *self,
421                                           PredictedChainingCell *cell,
422                                           const ClassObject *clazz)
423 {
424     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
425     /* Don't come back here for a long time if the method is native */
426     if (dvmIsNativeMethod(method)) {
427         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
428 
429         /*
430          * Put a non-zero/bogus value in the clazz field so that it won't
431          * trigger immediate patching and will continue to fail to match with
432          * a real clazz pointer.
433          */
434         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
435 
436         UPDATE_CODE_CACHE_PATCHES();
437         PROTECT_CODE_CACHE(cell, sizeof(*cell));
438         COMPILER_TRACE_CHAINING(
439             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
440                   cell, method->name));
441         goto done;
442     }
443     {
444     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
445 
446     /*
447      * Compilation not made yet for the callee. Reset the counter to a small
448      * value and come back to check soon.
449      */
450     if ((tgtAddr == 0) ||
451         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
452         COMPILER_TRACE_CHAINING(
453             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
454                   cell, method->clazz->descriptor, method->name));
455         goto done;
456     }
457 
458     PredictedChainingCell newCell;
459 
460     if (cell->clazz == NULL) {
461         newRechainCount = self->icRechainCount;
462     }
463 
464     int relOffset = (int) tgtAddr - (int)cell;
465     OpndSize immSize = estOpndSizeFromImm(relOffset);
466     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
467     relOffset -= jumpSize;
468     COMPILER_TRACE_CHAINING(
469             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
470                   cell, method->clazz->descriptor, method->name, jumpSize));
471     //can't use stream here since it is used by the compilation thread
472     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
473 
474     newCell.clazz = clazz;
475     newCell.method = method;
476 
477     /*
478      * Enter the work order to the queue and the chaining cell will be patched
479      * the next time a safe point is entered.
480      *
481      * If the enqueuing fails reset the rechain count to a normal value so that
482      * it won't get indefinitely delayed.
483      */
484     inlineCachePatchEnqueue(cell, &newCell);
485     }
486 done:
487     self->icRechainCount = newRechainCount;
488     return method;
489 }
490 
491 /*
492  * Unchain a trace given the starting address of the translation
493  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
494  * For ARM, it returns the address following the last cell unchained.
495  * For IA, it returns NULL since cacheflush is not required for IA.
496  */
dvmJitUnchain(void * codeAddr)497 u4* dvmJitUnchain(void* codeAddr)
498 {
499     /* codeAddr is 4-byte aligned, so is chain cell count offset */
500     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
501     u2 chainCellCountOffset = *pChainCellCountOffset;
502     /* chain cell counts information is 4-byte aligned */
503     ChainCellCounts *pChainCellCounts =
504           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
505     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
506     u2 chainCellOffset = *pChainCellOffset;
507     u1* pChainCells;
508     int i,j;
509     PredictedChainingCell *predChainCell;
510     int padding;
511 
512     /* Locate the beginning of the chain cell region */
513     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
514 
515     /* The cells are sorted in order - walk through them and reset */
516     for (i = 0; i < kChainingCellGap; i++) {
517         /* for hot, normal, singleton chaining:
518                nop  //padding.
519                jmp 0
520                mov imm32, reg1
521                mov imm32, reg2
522                call reg2
523            after chaining:
524                nop
525                jmp imm
526                mov imm32, reg1
527                mov imm32, reg2
528                call reg2
529            after unchaining:
530                nop
531                jmp 0
532                mov imm32, reg1
533                mov imm32, reg2
534                call reg2
535            Space occupied by the chaining cell in bytes: nop is for padding,
536                 jump 0, the target 0 is 4 bytes aligned.
537            Space for predicted chaining: 5 words = 20 bytes
538         */
539         int elemSize = 0;
540         if (i == kChainingCellInvokePredicted) {
541             elemSize = 20;
542         }
543         COMPILER_TRACE_CHAINING(
544             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
545 
546         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
547             switch(i) {
548                 case kChainingCellNormal:
549                 case kChainingCellHot:
550                 case kChainingCellInvokeSingleton:
551                 case kChainingCellBackwardBranch:
552                     COMPILER_TRACE_CHAINING(
553                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
554                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
555                     elemSize = 4+5+5+2;
556                     memset(pChainCells, 0, 4);
557                     break;
558                 case kChainingCellInvokePredicted:
559                     COMPILER_TRACE_CHAINING(
560                         ALOGI("Jit Runtime: unchaining of predicted"));
561                     /* 4-byte aligned */
562                     padding = (4 - ((u4)pChainCells & 3)) & 3;
563                     pChainCells += padding;
564                     predChainCell = (PredictedChainingCell *) pChainCells;
565                     /*
566                      * There could be a race on another mutator thread to use
567                      * this particular predicted cell and the check has passed
568                      * the clazz comparison. So we cannot safely wipe the
569                      * method and branch but it is safe to clear the clazz,
570                      * which serves as the key.
571                      */
572                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
573                     break;
574                 default:
575                     ALOGE("Unexpected chaining type: %d", i);
576                     dvmAbort();  // dvmAbort OK here - can't safely recover
577             }
578             COMPILER_TRACE_CHAINING(
579                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
580             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
581         }
582     }
583     return NULL;
584 }
585 
586 /* Unchain all translation in the cache. */
dvmJitUnchainAll()587 void dvmJitUnchainAll()
588 {
589     ALOGV("Jit Runtime: unchaining all");
590     if (gDvmJit.pJitEntryTable != NULL) {
591         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
592         dvmLockMutex(&gDvmJit.tableLock);
593 
594         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
595 
596         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
597             if (gDvmJit.pJitEntryTable[i].dPC &&
598                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
599                 gDvmJit.pJitEntryTable[i].codeAddress) {
600                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
601             }
602         }
603 
604         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
605 
606         dvmUnlockMutex(&gDvmJit.tableLock);
607         gDvmJit.translationChains = 0;
608     }
609     gDvmJit.hasNewChain = false;
610 }
611 
612 #define P_GPR_1 PhysicalReg_EBX
613 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()614 static void insertJumpHelp()
615 {
616     int rem = (uint)stream % 4;
617     int nop_size = 3 - rem;
618     dump_nop(nop_size);
619     unconditional_jump_int(0, OpndSize_32);
620     return;
621 }
622 
623 /* Chaining cell for code that may need warmup. */
624 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
625                  blx r0
626                  data 0xb23a //bytecode address: 0x5115b23a
627                  data 0x5115
628    IA32 assembly:
629                   jmp  0 //5 bytes
630                   movl address, %ebx
631                   movl dvmJitToInterpNormal, %eax
632                   call %eax
633                   <-- return address
634 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)635 static void handleNormalChainingCell(CompilationUnit *cUnit,
636                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
637 {
638     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
639           cUnit->method->name, blockId, offset, stream - streamMethodStart);
640     if(dump_x86_inst)
641         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
642               offset, stream - streamMethodStart, stream);
643     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
644      * reslove the multithreading issue.
645      */
646     insertJumpHelp();
647     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
648     scratchRegs[0] = PhysicalReg_EAX;
649     call_dvmJitToInterpNormal();
650     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
651 }
652 
653 /*
654  * Chaining cell for instructions that immediately following already translated
655  * code.
656  */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)657 static void handleHotChainingCell(CompilationUnit *cUnit,
658                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
659 {
660     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
661           cUnit->method->name, blockId, offset, stream - streamMethodStart);
662     if(dump_x86_inst)
663         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
664               offset, stream - streamMethodStart, stream);
665     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
666      * reslove the multithreading issue.
667      */
668     insertJumpHelp();
669     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
670     scratchRegs[0] = PhysicalReg_EAX;
671     call_dvmJitToInterpTraceSelect();
672     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
673 }
674 
675 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)676 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
677                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
678 {
679     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
680           cUnit->method->name, blockId, offset, stream - streamMethodStart);
681     if(dump_x86_inst)
682         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
683               offset, stream - streamMethodStart, stream);
684     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
685      * reslove the multithreading issue.
686      */
687     insertJumpHelp();
688     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
689     scratchRegs[0] = PhysicalReg_EAX;
690     call_dvmJitToInterpNormal();
691     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
692 }
693 
694 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)695 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
696                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
697 {
698     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
699           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
700     if(dump_x86_inst)
701         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
702               blockId, stream - streamMethodStart, stream);
703     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
704      * reslove the multithreading issue.
705      */
706     insertJumpHelp();
707     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
708     scratchRegs[0] = PhysicalReg_EAX;
709     call_dvmJitToInterpTraceSelect();
710     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
711 }
712 #undef P_GPR_1
713 
714 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)715 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
716 {
717     if(dump_x86_inst)
718         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
719               blockId, stream - streamMethodStart, stream);
720 #ifndef PREDICTED_CHAINING
721     //assume rPC for callee->insns in %ebx
722     scratchRegs[0] = PhysicalReg_EAX;
723 #if defined(WITH_JIT_TUNING)
724     /* Predicted chaining is not enabled. Fall back to interpreter and
725      * indicate that predicted chaining was not done.
726      */
727     move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
728 #endif
729     call_dvmJitToInterpTraceSelectNoChain();
730 #else
731     /* make sure section for predicited chaining cell is 4-byte aligned */
732     //int padding = (4 - ((u4)stream & 3)) & 3;
733     //stream += padding;
734     int* streamData = (int*)stream;
735     /* Should not be executed in the initial state */
736     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
737     streamData[1] = 0;
738     /* To be filled: class */
739     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
740     /* To be filled: method */
741     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
742     /*
743      * Rechain count. The initial value of 0 here will trigger chaining upon
744      * the first invocation of this callsite.
745      */
746     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
747 #if 0
748     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
749           *((int*)(stream+8)), *((int*)(stream+12)));
750 #endif
751     stream += 20; //5 *4
752 #endif
753 }
754 
755 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)756 static void handlePCReconstruction(CompilationUnit *cUnit,
757                                    LowOpBlockLabel *targetLabel)
758 {
759 #if 0
760     LowOp **pcrLabel =
761         (LowOp **) cUnit->pcReconstructionList.elemList;
762     int numElems = cUnit->pcReconstructionList.numUsed;
763     int i;
764     for (i = 0; i < numElems; i++) {
765         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
766         /* r0 = dalvik PC */
767         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
768         genUnconditionalBranch(cUnit, targetLabel);
769     }
770 #endif
771 }
772 
773 //use O0 code generator for hoisted checks outside of the loop
774 /*
775  * vA = arrayReg;
776  * vB = idxReg;
777  * vC = endConditionReg;
778  * arg[0] = maxC
779  * arg[1] = minC
780  * arg[2] = loopBranchConditionCode
781  */
782 #define P_GPR_1 PhysicalReg_EBX
783 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)784 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
785 {
786     /*
787      * NOTE: these synthesized blocks don't have ssa names assigned
788      * for Dalvik registers.  However, because they dominate the following
789      * blocks we can simply use the Dalvik name w/ subscript 0 as the
790      * ssa name.
791      */
792     DecodedInstruction *dInsn = &mir->dalvikInsn;
793     const int maxC = dInsn->arg[0];
794 
795     /* assign array in virtual register to P_GPR_1 */
796     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
797     /* assign index in virtual register to P_GPR_2 */
798     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
799     export_pc();
800     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
801     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
802     int delta = maxC;
803     /*
804      * If the loop end condition is ">=" instead of ">", then the largest value
805      * of the index is "endCondition - 1".
806      */
807     if (dInsn->arg[2] == OP_IF_GE) {
808         delta--;
809     }
810 
811     if (delta < 0) { //+delta
812         //if P_GPR_2 is mapped to a VR, we can't do this
813         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
814     } else if(delta > 0) {
815         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
816     }
817     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
818     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
819 }
820 
821 /*
822  * vA = arrayReg;
823  * vB = idxReg;
824  * vC = endConditionReg;
825  * arg[0] = maxC
826  * arg[1] = minC
827  * arg[2] = loopBranchConditionCode
828  */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)829 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
830 {
831     DecodedInstruction *dInsn = &mir->dalvikInsn;
832     const int maxC = dInsn->arg[0];
833 
834     /* assign array in virtual register to P_GPR_1 */
835     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
836     /* assign index in virtual register to P_GPR_2 */
837     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
838     export_pc();
839     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
840     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
841 
842     if (maxC < 0) {
843         //if P_GPR_2 is mapped to a VR, we can't do this
844         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
845     } else if(maxC > 0) {
846         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
847     }
848     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
849     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
850 
851 }
852 #undef P_GPR_1
853 #undef P_GPR_2
854 
855 /*
856  * vA = idxReg;
857  * vB = minC;
858  */
859 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)860 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
861 {
862     DecodedInstruction *dInsn = &mir->dalvikInsn;
863     const int minC = dInsn->vB;
864     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
865     export_pc();
866     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
867     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
868 }
869 #undef P_GPR_1
870 
871 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)872 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
873 {
874     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
875     if(gDvm.executionMode == kExecutionModeNcgO0) {
876         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
877         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
878         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
879         export_pc(); //use %edx
880         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
881         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
882         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
883     } else {
884         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
885         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
886         nullCheck(5, false, 1, mir->dalvikInsn.vC);
887         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
888         compare_reg_reg(4, false, 6, false);
889     }
890 
891     //immdiate will be updated later in genLandingPadForMispredictedCallee
892     streamMisPred = stream;
893     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
894 }
895 #endif
896 
897 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)898 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
899 {
900     ExecutionMode origMode = gDvm.executionMode;
901     gDvm.executionMode = kExecutionModeNcgO0;
902     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
903         case kMirOpPhi: {
904             break;
905         }
906         case kMirOpNullNRangeUpCheck: {
907             genHoistedChecksForCountUpLoop(cUnit, mir);
908             break;
909         }
910         case kMirOpNullNRangeDownCheck: {
911             genHoistedChecksForCountDownLoop(cUnit, mir);
912             break;
913         }
914         case kMirOpLowerBound: {
915             genHoistedLowerBoundCheck(cUnit, mir);
916             break;
917         }
918         case kMirOpPunt: {
919             break;
920         }
921 #ifdef WITH_JIT_INLINING
922         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
923             genValidationForPredictedInline(cUnit, mir);
924             break;
925         }
926 #endif
927         default:
928             break;
929     }
930     gDvm.executionMode = origMode;
931 }
932 
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)933 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
934                                 int bodyId)
935 {
936     /*
937      * Next, create two branches - one branch over to the loop body and the
938      * other branch to the PCR cell to punt.
939      */
940     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
941     //setupResourceMasks(branchToBody);
942     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
943 
944 #if 0
945     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
946     branchToPCR->opCode = kThumbBUncond;
947     branchToPCR->generic.target = (LIR *) pcrLabel;
948     setupResourceMasks(branchToPCR);
949     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
950 #endif
951 }
952 
953 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)954 bool mergeBlock(BasicBlock *bb) {
955     if(bb->blockType == kDalvikByteCode &&
956        bb->firstMIRInsn != NULL &&
957        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
958         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
959         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
960        bb->fallThrough == NULL) {// &&
961        //cUnit->hasLoop) {
962         //ALOGI("merge blocks ending with goto at index %d", i);
963         MIR* prevInsn = bb->lastMIRInsn->prev;
964         if(bb->taken == NULL) return false;
965         MIR* mergeInsn = bb->taken->firstMIRInsn;
966         if(mergeInsn == NULL) return false;
967         if(prevInsn == NULL) {//the block has a single instruction
968             bb->firstMIRInsn = mergeInsn;
969         } else {
970             prevInsn->next = mergeInsn; //remove goto from the chain
971         }
972         mergeInsn->prev = prevInsn;
973         bb->lastMIRInsn = bb->taken->lastMIRInsn;
974         bb->taken->firstMIRInsn = NULL; //block being merged in
975         bb->fallThrough = bb->taken->fallThrough;
976         bb->taken = bb->taken->taken;
977         return true;
978     }
979     return false;
980 }
981 
genTraceProfileEntry(CompilationUnit * cUnit)982 static int genTraceProfileEntry(CompilationUnit *cUnit)
983 {
984     cUnit->headerSize = 6;
985     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
986         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
987         return 12;
988     } else {
989         return 4;
990     }
991 
992 }
993 
994 #define PRINT_BUFFER_LEN 1024
995 /* Print the code block in code cache in the range of [startAddr, endAddr)
996  * in readable format.
997  */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)998 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
999 {
1000     char strbuf[PRINT_BUFFER_LEN];
1001     unsigned char *addr;
1002     unsigned char *next_addr;
1003     int n;
1004 
1005     if (gDvmJit.printBinary) {
1006         // print binary in bytes
1007         n = 0;
1008         for (addr = startAddr; addr < endAddr; addr++) {
1009             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
1010             if (n > PRINT_BUFFER_LEN - 10) {
1011                 ALOGD("## %s", strbuf);
1012                 n = 0;
1013             }
1014         }
1015         if (n > 0)
1016             ALOGD("## %s", strbuf);
1017     }
1018 
1019     // print disassembled instructions
1020     addr = startAddr;
1021     while (addr < endAddr) {
1022         next_addr = reinterpret_cast<unsigned char*>
1023             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1024                                        strbuf, PRINT_BUFFER_LEN));
1025         if (addr != next_addr) {
1026             ALOGD("**  %p: %s", addr, strbuf);
1027         } else {                // check whether this is nop padding
1028             if (addr[0] == 0x90) {
1029                 ALOGD("**  %p: NOP (1 byte)", addr);
1030                 next_addr += 1;
1031             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1032                 ALOGD("**  %p: NOP (2 bytes)", addr);
1033                 next_addr += 2;
1034             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1035                 ALOGD("**  %p: NOP (3 bytes)", addr);
1036                 next_addr += 3;
1037             } else {
1038                 ALOGD("** unable to decode binary at %p", addr);
1039                 break;
1040             }
1041         }
1042         addr = next_addr;
1043     }
1044 }
1045 
1046 /* 4 is the number of additional bytes needed for chaining information for trace:
1047  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1048 #define EXTRA_BYTES_FOR_CHAINING 4
1049 
1050 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1051 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1052 {
1053     dump_x86_inst = cUnit->printMe;
1054     /* Used to hold the labels of each block */
1055     LowOpBlockLabel *labelList =
1056         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1057     LowOp *headLIR = NULL;
1058     GrowableList chainingListByType[kChainingCellLast];
1059     unsigned int i, padding;
1060 
1061     /*
1062      * Initialize various types chaining lists.
1063      */
1064     for (i = 0; i < kChainingCellLast; i++) {
1065         dvmInitGrowableList(&chainingListByType[i], 2);
1066     }
1067 
1068     /* Clear the visited flag for each block */
1069     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1070                                           kAllNodes, false /* isIterative */);
1071 
1072     GrowableListIterator iterator;
1073     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1074 
1075     /* Traces start with a profiling entry point.  Generate it here */
1076     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1077 
1078     //BasicBlock **blockList = cUnit->blockList;
1079     GrowableList *blockList = &cUnit->blockList;
1080     BasicBlock *bb;
1081 
1082     info->codeAddress = NULL;
1083     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1084 
1085     // TODO: compile into a temporary buffer and then copy into the code cache.
1086     // That would let us leave the code cache unprotected for a shorter time.
1087     size_t unprotected_code_cache_bytes =
1088             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1089     UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1090 
1091     streamStart = stream; /* trace start before alignment */
1092     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1093     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1094     streamMethodStart = stream; /* code start */
1095     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1096         labelList[i].lop.generic.offset = -1;
1097     }
1098     cUnit->exceptionBlockId = -1;
1099     for (i = 0; i < blockList->numUsed; i++) {
1100         bb = (BasicBlock *) blockList->elemList[i];
1101         if(bb->blockType == kExceptionHandling)
1102             cUnit->exceptionBlockId = i;
1103     }
1104     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1105     if(gDvm.executionMode == kExecutionModeNcgO1) {
1106         //merge blocks ending with "goto" with the fall through block
1107         if (cUnit->jitMode != kJitLoop)
1108             for (i = 0; i < blockList->numUsed; i++) {
1109                 bb = (BasicBlock *) blockList->elemList[i];
1110                 bool merged = mergeBlock(bb);
1111                 while(merged) merged = mergeBlock(bb);
1112             }
1113         for (i = 0; i < blockList->numUsed; i++) {
1114             bb = (BasicBlock *) blockList->elemList[i];
1115             if(bb->blockType == kDalvikByteCode &&
1116                bb->firstMIRInsn != NULL) {
1117                 preprocessingBB(bb);
1118             }
1119         }
1120         preprocessingTrace();
1121     }
1122 
1123     /* Handle the content in each basic block */
1124     for (i = 0; ; i++) {
1125         MIR *mir;
1126         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1127         if (bb == NULL) break;
1128         if (bb->visited == true) continue;
1129 
1130         labelList[i].immOpnd.value = bb->startOffset;
1131 
1132         if (bb->blockType >= kChainingCellLast) {
1133             /*
1134              * Append the label pseudo LIR first. Chaining cells will be handled
1135              * separately afterwards.
1136              */
1137             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1138         }
1139 
1140         if (bb->blockType == kEntryBlock) {
1141             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1142             if (bb->firstMIRInsn == NULL) {
1143                 continue;
1144             } else {
1145               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1146                                   //&labelList[blockList[i]->fallThrough->id]);
1147             }
1148         } else if (bb->blockType == kExitBlock) {
1149             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1150             labelList[i].lop.generic.offset = (stream - streamMethodStart);
1151             goto gen_fallthrough;
1152         } else if (bb->blockType == kDalvikByteCode) {
1153             if (bb->hidden == true) continue;
1154             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1155             /* Reset the register state */
1156 #if 0
1157             resetRegisterScoreboard(cUnit);
1158 #endif
1159         } else {
1160             switch (bb->blockType) {
1161                 case kChainingCellNormal:
1162                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1163                     /* handle the codegen later */
1164                     dvmInsertGrowableList(
1165                         &chainingListByType[kChainingCellNormal], i);
1166                     break;
1167                 case kChainingCellInvokeSingleton:
1168                     labelList[i].lop.opCode2 =
1169                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1170                     labelList[i].immOpnd.value =
1171                         (int) bb->containingMethod;
1172                     /* handle the codegen later */
1173                     dvmInsertGrowableList(
1174                         &chainingListByType[kChainingCellInvokeSingleton], i);
1175                     break;
1176                 case kChainingCellInvokePredicted:
1177                     labelList[i].lop.opCode2 =
1178                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1179                    /*
1180                      * Move the cached method pointer from operand 1 to 0.
1181                      * Operand 0 was clobbered earlier in this routine to store
1182                      * the block starting offset, which is not applicable to
1183                      * predicted chaining cell.
1184                      */
1185                     //TODO
1186                     //labelList[i].operands[0] = labelList[i].operands[1];
1187 
1188                     /* handle the codegen later */
1189                     dvmInsertGrowableList(
1190                         &chainingListByType[kChainingCellInvokePredicted], i);
1191                     break;
1192                 case kChainingCellHot:
1193                     labelList[i].lop.opCode2 =
1194                         ATOM_PSEUDO_CHAINING_CELL_HOT;
1195                     /* handle the codegen later */
1196                     dvmInsertGrowableList(
1197                         &chainingListByType[kChainingCellHot], i);
1198                     break;
1199                 case kPCReconstruction:
1200                     /* Make sure exception handling block is next */
1201                     labelList[i].lop.opCode2 =
1202                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1203                     //assert (i == cUnit->numBlocks - 2);
1204                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1205                     handlePCReconstruction(cUnit,
1206                                            &labelList[cUnit->puntBlock->id]);
1207                     break;
1208                 case kExceptionHandling:
1209                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1210                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1211                     //if (cUnit->pcReconstructionList.numUsed) {
1212                         scratchRegs[0] = PhysicalReg_EAX;
1213                         jumpToInterpPunt();
1214                         //call_dvmJitToInterpPunt();
1215                     //}
1216                     break;
1217                 case kChainingCellBackwardBranch:
1218                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1219                     /* handle the codegen later */
1220                     dvmInsertGrowableList(
1221                         &chainingListByType[kChainingCellBackwardBranch],
1222                         i);
1223                     break;
1224                 default:
1225                     break;
1226             }
1227             continue;
1228         }
1229         {
1230         //LowOp *headLIR = NULL;
1231         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1232         const u2 *startCodePtr = dexCode->insns;
1233         const u2 *codePtr;
1234         labelList[i].lop.generic.offset = (stream - streamMethodStart);
1235         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1236               bb->id, bb->blockType, bb->hidden);
1237         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1238             bb = nextBB;
1239             bb->visited = true;
1240             cUnit->nextCodegenBlock = NULL;
1241 
1242         if(gDvm.executionMode == kExecutionModeNcgO1 &&
1243            bb->blockType != kEntryBlock &&
1244            bb->firstMIRInsn != NULL) {
1245             startOfBasicBlock(bb);
1246             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1247             endOfBasicBlock(bb);
1248             if(cg_ret < 0) {
1249                 endOfTrace(true/*freeOnly*/);
1250                 cUnit->baseAddr = NULL;
1251                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1252                 return;
1253             }
1254         } else {
1255         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1256             startOfBasicBlock(bb); //why here for O0
1257             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1258             if((int)dalvikOpCode >= (int)kMirOpFirst) {
1259                 handleExtendedMIR(cUnit, mir);
1260                 continue;
1261             }
1262             InstructionFormat dalvikFormat =
1263                 dexGetFormatFromOpcode(dalvikOpCode);
1264             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1265                   mir->offset, dalvikOpCode, dalvikFormat);
1266             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1267             /* Remember the first LIR for this block */
1268             if (headLIR == NULL) {
1269                 headLIR = (LowOp*)boundaryLIR;
1270             }
1271             bool notHandled = true;
1272             /*
1273              * Debugging: screen the opcode first to see if it is in the
1274              * do[-not]-compile list
1275              */
1276             bool singleStepMe =
1277                 gDvmJit.includeSelectedOp !=
1278                 ((gDvmJit.opList[dalvikOpCode >> 3] &
1279                   (1 << (dalvikOpCode & 0x7))) !=
1280                  0);
1281             if (singleStepMe || cUnit->allSingleStep) {
1282             } else {
1283                 codePtr = startCodePtr + mir->offset;
1284                 //lower each byte code, update LIR
1285                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1286                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1287                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1288                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1289                     gDvmJit.codeCacheFull = true;
1290                     cUnit->baseAddr = NULL;
1291                     endOfTrace(true/*freeOnly*/);
1292                     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1293                     return;
1294                 }
1295             }
1296             if (notHandled) {
1297                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1298                      mir->offset,
1299                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1300                      dalvikFormat);
1301                 dvmAbort();
1302                 break;
1303             }
1304         } // end for
1305         } // end else //JIT + O0 code generator
1306         }
1307         } // end for
1308         /* Eliminate redundant loads/stores and delay stores into later slots */
1309 #if 0
1310         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1311                                            cUnit->lastLIRInsn);
1312 #endif
1313         if (headLIR) headLIR = NULL;
1314 gen_fallthrough:
1315         /*
1316          * Check if the block is terminated due to trace length constraint -
1317          * insert an unconditional branch to the chaining cell.
1318          */
1319         if (bb->needFallThroughBranch) {
1320             jumpToBasicBlock(stream, bb->fallThrough->id);
1321         }
1322 
1323     }
1324 
1325     char* streamChainingStart = (char*)stream;
1326     /* Handle the chaining cells in predefined order */
1327     for (i = 0; i < kChainingCellGap; i++) {
1328         size_t j;
1329         int *blockIdList = (int *) chainingListByType[i].elemList;
1330 
1331         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1332 
1333         /* No chaining cells of this type */
1334         if (cUnit->numChainingCells[i] == 0)
1335             continue;
1336 
1337         /* Record the first LIR for a new type of chaining cell */
1338         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1339         for (j = 0; j < chainingListByType[i].numUsed; j++) {
1340             int blockId = blockIdList[j];
1341             BasicBlock *chainingBlock =
1342                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1343                                                          blockId);
1344 
1345             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1346 
1347             /* Align this chaining cell first */
1348 #if 0
1349             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1350 #endif
1351             /* Insert the pseudo chaining instruction */
1352             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1353 
1354 
1355             switch (chainingBlock->blockType) {
1356                 case kChainingCellNormal:
1357                     handleNormalChainingCell(cUnit,
1358                      chainingBlock->startOffset, blockId, labelList);
1359                     break;
1360                 case kChainingCellInvokeSingleton:
1361                     handleInvokeSingletonChainingCell(cUnit,
1362                         chainingBlock->containingMethod, blockId, labelList);
1363                     break;
1364                 case kChainingCellInvokePredicted:
1365                     handleInvokePredictedChainingCell(cUnit, blockId);
1366                     break;
1367                 case kChainingCellHot:
1368                     handleHotChainingCell(cUnit,
1369                         chainingBlock->startOffset, blockId, labelList);
1370                     break;
1371                 case kChainingCellBackwardBranch:
1372                     handleBackwardBranchChainingCell(cUnit,
1373                         chainingBlock->startOffset, blockId, labelList);
1374                     break;
1375                 default:
1376                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
1377                     dvmAbort();
1378                     break;
1379             }
1380 
1381             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1382                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1383                 gDvmJit.codeCacheFull = true;
1384                 cUnit->baseAddr = NULL;
1385                 endOfTrace(true); /* need to free structures */
1386                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1387                 return;
1388             }
1389         }
1390     }
1391 #if 0
1392     dvmCompilerApplyGlobalOptimizations(cUnit);
1393 #endif
1394     endOfTrace(false);
1395 
1396     if (gDvmJit.codeCacheFull) {
1397         /* We hit code cache size limit inside endofTrace(false).
1398          * Bail out for this trace!
1399          */
1400         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1401         cUnit->baseAddr = NULL;
1402         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1403         return;
1404     }
1405 
1406     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1407     padding = (4 - ((u4)stream & 3)) & 3;
1408     stream += padding;
1409     ChainCellCounts chainCellCounts;
1410     /* Install the chaining cell counts */
1411     for (i=0; i< kChainingCellGap; i++) {
1412         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1413     }
1414     char* streamCountStart = (char*)stream;
1415     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1416     stream += sizeof(chainCellCounts);
1417 
1418     cUnit->baseAddr = streamMethodStart;
1419     cUnit->totalSize = (stream - streamStart);
1420     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1421         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1422         gDvmJit.codeCacheFull = true;
1423         cUnit->baseAddr = NULL;
1424         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1425         return;
1426     }
1427 
1428     /* write chaining cell count offset & chaining cell offset */
1429     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1430     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1431     pOffset[1] = streamChainingStart - streamMethodStart;
1432 
1433     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1434 
1435     gDvmJit.codeCacheByteUsed += (stream - streamStart);
1436     if (cUnit->printMe) {
1437         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1438         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1439         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1440               cUnit->method->clazz->descriptor, cUnit->method->name,
1441               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1442         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1443               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1444         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1445     }
1446     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1447           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1448           cUnit->totalSize, gDvmJit.codeCache);
1449 
1450     gDvmJit.numCompilations++;
1451 
1452     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1453 }
1454 
1455 /*
1456  * Perform translation chain operation.
1457  */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1458 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1459 {
1460 #ifdef JIT_CHAIN
1461     int relOffset = (int) tgtAddr - (int)branchAddr;
1462 
1463     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1464         (gDvmJit.codeCacheFull == false)) {
1465 
1466         gDvmJit.translationChains++;
1467 
1468         //OpndSize immSize = estOpndSizeFromImm(relOffset);
1469         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1470         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1471          * the original code sequence.
1472          */
1473         OpndSize immSize = OpndSize_32;
1474         relOffset -= 5;
1475         //can't use stream here since it is used by the compilation thread
1476         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1477         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1478         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1479 
1480         gDvmJit.hasNewChain = true;
1481 
1482         COMPILER_TRACE_CHAINING(
1483             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1484                   (int) branchAddr, tgtAddr, relOffset));
1485     }
1486 #endif
1487     return tgtAddr;
1488 }
1489 
1490 /*
1491  * Accept the work and start compiling.  Returns true if compilation
1492  * is attempted.
1493  */
dvmCompilerDoWork(CompilerWorkOrder * work)1494 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1495 {
1496     JitTraceDescription *desc;
1497     bool isCompile;
1498     bool success = true;
1499 
1500     if (gDvmJit.codeCacheFull) {
1501         return false;
1502     }
1503 
1504     switch (work->kind) {
1505         case kWorkOrderTrace:
1506             isCompile = true;
1507             /* Start compilation with maximally allowed trace length */
1508             desc = (JitTraceDescription *)work->info;
1509             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1510                                         work->bailPtr, 0 /* no hints */);
1511             break;
1512         case kWorkOrderTraceDebug: {
1513             bool oldPrintMe = gDvmJit.printMe;
1514             gDvmJit.printMe = true;
1515             isCompile = true;
1516             /* Start compilation with maximally allowed trace length */
1517             desc = (JitTraceDescription *)work->info;
1518             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1519                                         work->bailPtr, 0 /* no hints */);
1520             gDvmJit.printMe = oldPrintMe;
1521             break;
1522         }
1523         case kWorkOrderProfileMode:
1524             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1525             isCompile = false;
1526             break;
1527         default:
1528             isCompile = false;
1529             ALOGE("Jit: unknown work order type");
1530             assert(0);  // Bail if debug build, discard otherwise
1531     }
1532     if (!success)
1533         work->result.codeAddress = NULL;
1534     return isCompile;
1535 }
1536 
dvmCompilerCacheFlush(long start,long end,long flags)1537 void dvmCompilerCacheFlush(long start, long end, long flags) {
1538   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1539 }
1540 
1541 //#endif
1542