• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <sys/mman.h>
17 #include "Dalvik.h"
18 #include "libdex/DexOpcodes.h"
19 #include "compiler/Compiler.h"
20 #include "compiler/CompilerIR.h"
21 #include "interp/Jit.h"
22 #include "libdex/DexFile.h"
23 #include "Lower.h"
24 #include "NcgAot.h"
25 #include "compiler/codegen/CompilerCodegen.h"
26 
27 /* Init values when a predicted chain is initially assembled */
28 /* E7FE is branch to self */
29 #define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
30 
31 /* Target-specific save/restore */
32 extern "C" void dvmJitCalleeSave(double *saveArea);
33 extern "C" void dvmJitCalleeRestore(double *saveArea);
34 
35 /*
36  * Determine the initial instruction set to be used for this trace.
37  * Later components may decide to change this.
38  */
39 //JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
dvmCompilerInstructionSet(void)40 JitInstructionSetType dvmCompilerInstructionSet(void)
41 {
42     return DALVIK_JIT_IA32;
43 }
44 
dvmCompilerGetInterpretTemplateSet()45 JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46 {
47     return DALVIK_JIT_IA32;
48 }
49 
50 /* we don't use template for IA32 */
dvmCompilerGetInterpretTemplate()51 void *dvmCompilerGetInterpretTemplate()
52 {
53       return NULL;
54 }
55 
56 /* Track the number of times that the code cache is patched */
57 #if defined(WITH_JIT_TUNING)
58 #define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
59 #else
60 #define UPDATE_CODE_CACHE_PATCHES()
61 #endif
62 
dvmCompilerArchInit()63 bool dvmCompilerArchInit() {
64     /* Target-specific configuration */
65     gDvmJit.jitTableSize = 1 << 12;
66     gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67     gDvmJit.threshold = 255;
68     gDvmJit.codeCacheSize = 512*1024;
69     gDvmJit.optLevel = kJitOptLevelO1;
70 
71 #if defined(WITH_SELF_VERIFICATION)
72     /* Force into blocking mode */
73     gDvmJit.blockingMode = true;
74     gDvm.nativeDebuggerActive = true;
75 #endif
76 
77     // Make sure all threads have current values
78     dvmJitUpdateThreadStateAll();
79 
80     return true;
81 }
82 
dvmCompilerPatchInlineCache(void)83 void dvmCompilerPatchInlineCache(void)
84 {
85     int i;
86     PredictedChainingCell *minAddr, *maxAddr;
87 
88     /* Nothing to be done */
89     if (gDvmJit.compilerICPatchIndex == 0) return;
90 
91     /*
92      * Since all threads are already stopped we don't really need to acquire
93      * the lock. But race condition can be easily introduced in the future w/o
94      * paying attention so we still acquire the lock here.
95      */
96     dvmLockMutex(&gDvmJit.compilerICPatchLock);
97 
98     UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
99 
100     //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
101 
102     /* Initialize the min/max address range */
103     minAddr = (PredictedChainingCell *)
104         ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
105     maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
106 
107     for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
108         ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
109         PredictedChainingCell *cellAddr = workOrder->cellAddr;
110         PredictedChainingCell *cellContent = &workOrder->cellContent;
111         ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
112                                                 workOrder->classLoader);
113 
114         assert(clazz->serialNumber == workOrder->serialNumber);
115 
116         /* Use the newly resolved clazz pointer */
117         cellContent->clazz = clazz;
118 
119         if (cellAddr->clazz == NULL) {
120             COMPILER_TRACE_CHAINING(
121                 ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
122                       cellAddr,
123                       cellContent->clazz->descriptor,
124                       cellContent->method->name));
125         } else {
126             COMPILER_TRACE_CHAINING(
127                 ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
128                       "patched",
129                       cellAddr,
130                       cellAddr->clazz->descriptor,
131                       cellContent->clazz->descriptor,
132                       cellContent->method->name));
133         }
134 
135         /* Patch the chaining cell */
136         *cellAddr = *cellContent;
137         minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
138         maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
139     }
140 
141     PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
142 
143     gDvmJit.compilerICPatchIndex = 0;
144     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
145 }
146 
147 /* Target-specific cache clearing */
dvmCompilerCacheClear(char * start,size_t size)148 void dvmCompilerCacheClear(char *start, size_t size)
149 {
150     /* "0xFF 0xFF" is an invalid opcode for x86. */
151     memset(start, 0xFF, size);
152 }
153 
154 /* for JIT debugging, to be implemented */
dvmJitCalleeSave(double * saveArea)155 void dvmJitCalleeSave(double *saveArea) {
156 }
157 
dvmJitCalleeRestore(double * saveArea)158 void dvmJitCalleeRestore(double *saveArea) {
159 }
160 
dvmJitToInterpSingleStep()161 void dvmJitToInterpSingleStep() {
162 }
163 
dvmCopyTraceDescriptor(const u2 * pc,const JitEntry * knownEntry)164 JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
165                                             const JitEntry *knownEntry) {
166     return NULL;
167 }
168 
dvmCompilerCodegenDump(CompilationUnit * cUnit)169 void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
170 {
171 }
172 
dvmCompilerArchDump(void)173 void dvmCompilerArchDump(void)
174 {
175 }
176 
getTraceBase(const JitEntry * p)177 char *getTraceBase(const JitEntry *p)
178 {
179     return NULL;
180 }
181 
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)182 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
183 {
184 }
185 
dvmJitInstallClassObjectPointers(CompilationUnit * cUnit,char * codeAddress)186 void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
187 {
188 }
189 
dvmCompilerMethodMIR2LIR(CompilationUnit * cUnit)190 void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
191 {
192     // Method-based JIT not supported for x86.
193 }
194 
dvmJitScanAllClassPointers(void (* callback)(void *))195 void dvmJitScanAllClassPointers(void (*callback)(void *))
196 {
197 }
198 
199 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)200 static inline int getProfileCount(const JitEntry *entry)
201 {
202     if (entry->dPC == 0 || entry->codeAddress == 0)
203         return 0;
204     u4 *pExecutionCount = (u4 *) getTraceBase(entry);
205 
206     return pExecutionCount ? *pExecutionCount : 0;
207 }
208 
209 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)210 static int sortTraceProfileCount(const void *entry1, const void *entry2)
211 {
212     const JitEntry *jitEntry1 = (const JitEntry *)entry1;
213     const JitEntry *jitEntry2 = (const JitEntry *)entry2;
214 
215     JitTraceCounter_t count1 = getProfileCount(jitEntry1);
216     JitTraceCounter_t count2 = getProfileCount(jitEntry2);
217     return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
218 }
219 
220 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()221 void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
222 {
223     JitEntry *sortedEntries;
224     int numTraces = 0;
225     unsigned long counts = 0;
226     unsigned int i;
227 
228     /* Make sure that the table is not changing */
229     dvmLockMutex(&gDvmJit.tableLock);
230 
231     /* Sort the entries by descending order */
232     sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
233     if (sortedEntries == NULL)
234         goto done;
235     memcpy(sortedEntries, gDvmJit.pJitEntryTable,
236            sizeof(JitEntry) * gDvmJit.jitTableSize);
237     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
238           sortTraceProfileCount);
239 
240     /* Dump the sorted entries */
241     for (i=0; i < gDvmJit.jitTableSize; i++) {
242         if (sortedEntries[i].dPC != 0) {
243             numTraces++;
244         }
245     }
246     if (numTraces == 0)
247         numTraces = 1;
248     ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
249 
250     free(sortedEntries);
251 done:
252     dvmUnlockMutex(&gDvmJit.tableLock);
253     return;
254 }
255 
jumpWithRelOffset(char * instAddr,int relOffset)256 void jumpWithRelOffset(char* instAddr, int relOffset) {
257     stream = instAddr;
258     OpndSize immSize = estOpndSizeFromImm(relOffset);
259     relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
260     dump_imm(Mnemonic_JMP, immSize, relOffset);
261 }
262 
263 // works whether instructions for target basic block are generated or not
jumpToBasicBlock(char * instAddr,int targetId)264 LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
265     stream = instAddr;
266     bool unknown;
267     OpndSize size;
268     int relativeNCG = targetId;
269     relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
270     unconditional_jump_int(relativeNCG, size);
271     return NULL;
272 }
273 
condJumpToBasicBlock(char * instAddr,ConditionCode cc,int targetId)274 LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
275     stream = instAddr;
276     bool unknown;
277     OpndSize size;
278     int relativeNCG = targetId;
279     relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
280     conditional_jump_int(cc, relativeNCG, size);
281     return NULL;
282 }
283 
284 /*
285  * Attempt to enqueue a work order to patch an inline cache for a predicted
286  * chaining cell for virtual/interface calls.
287  */
inlineCachePatchEnqueue(PredictedChainingCell * cellAddr,PredictedChainingCell * newContent)288 static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
289                                     PredictedChainingCell *newContent)
290 {
291     bool result = true;
292 
293     /*
294      * Make sure only one thread gets here since updating the cell (ie fast
295      * path and queueing the request (ie the queued path) have to be done
296      * in an atomic fashion.
297      */
298     dvmLockMutex(&gDvmJit.compilerICPatchLock);
299 
300     /* Fast path for uninitialized chaining cell */
301     if (cellAddr->clazz == NULL &&
302         cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
303         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
304 
305         cellAddr->method = newContent->method;
306         cellAddr->branch = newContent->branch;
307         cellAddr->branch2 = newContent->branch2;
308 
309         /*
310          * The update order matters - make sure clazz is updated last since it
311          * will bring the uninitialized chaining cell to life.
312          */
313         android_atomic_release_store((int32_t)newContent->clazz,
314             (volatile int32_t *)(void*) &cellAddr->clazz);
315         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
316         UPDATE_CODE_CACHE_PATCHES();
317 
318         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
319 
320 #if 0
321         MEM_BARRIER();
322         cellAddr->clazz = newContent->clazz;
323         //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
324 #endif
325 #if defined(IA_JIT_TUNING)
326         gDvmJit.icPatchInit++;
327 #endif
328         COMPILER_TRACE_CHAINING(
329             ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
330                   cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
331     /* Check if this is a frequently missed clazz */
332     } else if (cellAddr->stagedClazz != newContent->clazz) {
333         /* Not proven to be frequent yet - build up the filter cache */
334         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
335 
336         cellAddr->stagedClazz = newContent->clazz;
337 
338         UPDATE_CODE_CACHE_PATCHES();
339         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340 
341 #if defined(WITH_JIT_TUNING)
342         gDvmJit.icPatchRejected++;
343 #endif
344     /*
345      * Different classes but same method implementation - it is safe to just
346      * patch the class value without the need to stop the world.
347      */
348     } else if (cellAddr->method == newContent->method) {
349         UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
350 
351         cellAddr->clazz = newContent->clazz;
352         /* No need to flush the cache here since the branch is not patched */
353         UPDATE_CODE_CACHE_PATCHES();
354 
355         PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
356 
357 #if defined(WITH_JIT_TUNING)
358         gDvmJit.icPatchLockFree++;
359 #endif
360     /*
361      * Cannot patch the chaining cell inline - queue it until the next safe
362      * point.
363      */
364     } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
365         int index = gDvmJit.compilerICPatchIndex++;
366         const ClassObject *clazz = newContent->clazz;
367 
368         gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
369         gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
370         gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
371         gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
372         /* For verification purpose only */
373         gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
374 
375 #if defined(WITH_JIT_TUNING)
376         gDvmJit.icPatchQueued++;
377 #endif
378         COMPILER_TRACE_CHAINING(
379             ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
380                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
381     } else {
382     /* Queue is full - just drop this patch request */
383 #if defined(WITH_JIT_TUNING)
384         gDvmJit.icPatchDropped++;
385 #endif
386 
387         COMPILER_TRACE_CHAINING(
388             ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
389                   cellAddr, newContent->clazz->descriptor, newContent->method->name));
390     }
391 
392     dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
393     return result;
394 }
395 
396 /*
397  * This method is called from the invoke templates for virtual and interface
398  * methods to speculatively setup a chain to the callee. The templates are
399  * written in assembly and have setup method, cell, and clazz at r0, r2, and
400  * r3 respectively, so there is a unused argument in the list. Upon return one
401  * of the following three results may happen:
402  *   1) Chain is not setup because the callee is native. Reset the rechain
403  *      count to a big number so that it will take a long time before the next
404  *      rechain attempt to happen.
405  *   2) Chain is not setup because the callee has not been created yet. Reset
406  *      the rechain count to a small number and retry in the near future.
407  *   3) Ask all other threads to stop before patching this chaining cell.
408  *      This is required because another thread may have passed the class check
409  *      but hasn't reached the chaining cell yet to follow the chain. If we
410  *      patch the content before halting the other thread, there could be a
411  *      small window for race conditions to happen that it may follow the new
412  *      but wrong chain to invoke a different method.
413  */
dvmJitToPatchPredictedChain(const Method * method,Thread * self,PredictedChainingCell * cell,const ClassObject * clazz)414 const Method *dvmJitToPatchPredictedChain(const Method *method,
415                                           Thread *self,
416                                           PredictedChainingCell *cell,
417                                           const ClassObject *clazz)
418 {
419     int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
420     /* Don't come back here for a long time if the method is native */
421     if (dvmIsNativeMethod(method)) {
422         UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
423 
424         /*
425          * Put a non-zero/bogus value in the clazz field so that it won't
426          * trigger immediate patching and will continue to fail to match with
427          * a real clazz pointer.
428          */
429         cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
430 
431         UPDATE_CODE_CACHE_PATCHES();
432         PROTECT_CODE_CACHE(cell, sizeof(*cell));
433         COMPILER_TRACE_CHAINING(
434             ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
435                   cell, method->name));
436         goto done;
437     }
438     {
439     int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
440 
441     /*
442      * Compilation not made yet for the callee. Reset the counter to a small
443      * value and come back to check soon.
444      */
445     if ((tgtAddr == 0) ||
446         ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
447         COMPILER_TRACE_CHAINING(
448             ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
449                   cell, method->clazz->descriptor, method->name));
450         goto done;
451     }
452 
453     PredictedChainingCell newCell;
454 
455     if (cell->clazz == NULL) {
456         newRechainCount = self->icRechainCount;
457     }
458 
459     int relOffset = (int) tgtAddr - (int)cell;
460     OpndSize immSize = estOpndSizeFromImm(relOffset);
461     int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
462     relOffset -= jumpSize;
463     COMPILER_TRACE_CHAINING(
464             ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
465                   cell, method->clazz->descriptor, method->name, jumpSize));
466     //can't use stream here since it is used by the compilation thread
467     dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
468 
469     newCell.clazz = clazz;
470     newCell.method = method;
471 
472     /*
473      * Enter the work order to the queue and the chaining cell will be patched
474      * the next time a safe point is entered.
475      *
476      * If the enqueuing fails reset the rechain count to a normal value so that
477      * it won't get indefinitely delayed.
478      */
479     inlineCachePatchEnqueue(cell, &newCell);
480     }
481 done:
482     self->icRechainCount = newRechainCount;
483     return method;
484 }
485 
486 /*
487  * Unchain a trace given the starting address of the translation
488  * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
489  * For ARM, it returns the address following the last cell unchained.
490  * For IA, it returns NULL since cacheflush is not required for IA.
491  */
dvmJitUnchain(void * codeAddr)492 u4* dvmJitUnchain(void* codeAddr)
493 {
494     /* codeAddr is 4-byte aligned, so is chain cell count offset */
495     u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
496     u2 chainCellCountOffset = *pChainCellCountOffset;
497     /* chain cell counts information is 4-byte aligned */
498     ChainCellCounts *pChainCellCounts =
499           (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
500     u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
501     u2 chainCellOffset = *pChainCellOffset;
502     u1* pChainCells;
503     int i,j;
504     PredictedChainingCell *predChainCell;
505     int padding;
506 
507     /* Locate the beginning of the chain cell region */
508     pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
509 
510     /* The cells are sorted in order - walk through them and reset */
511     for (i = 0; i < kChainingCellGap; i++) {
512         /* for hot, normal, singleton chaining:
513                nop  //padding.
514                jmp 0
515                mov imm32, reg1
516                mov imm32, reg2
517                call reg2
518            after chaining:
519                nop
520                jmp imm
521                mov imm32, reg1
522                mov imm32, reg2
523                call reg2
524            after unchaining:
525                nop
526                jmp 0
527                mov imm32, reg1
528                mov imm32, reg2
529                call reg2
530            Space occupied by the chaining cell in bytes: nop is for padding,
531                 jump 0, the target 0 is 4 bytes aligned.
532            Space for predicted chaining: 5 words = 20 bytes
533         */
534         int elemSize = 0;
535         if (i == kChainingCellInvokePredicted) {
536             elemSize = 20;
537         }
538         COMPILER_TRACE_CHAINING(
539             ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
540 
541         for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
542             switch(i) {
543                 case kChainingCellNormal:
544                 case kChainingCellHot:
545                 case kChainingCellInvokeSingleton:
546                 case kChainingCellBackwardBranch:
547                     COMPILER_TRACE_CHAINING(
548                         ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
549                     pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
550                     elemSize = 4+5+5+2;
551                     memset(pChainCells, 0, 4);
552                     break;
553                 case kChainingCellInvokePredicted:
554                     COMPILER_TRACE_CHAINING(
555                         ALOGI("Jit Runtime: unchaining of predicted"));
556                     /* 4-byte aligned */
557                     padding = (4 - ((u4)pChainCells & 3)) & 3;
558                     pChainCells += padding;
559                     predChainCell = (PredictedChainingCell *) pChainCells;
560                     /*
561                      * There could be a race on another mutator thread to use
562                      * this particular predicted cell and the check has passed
563                      * the clazz comparison. So we cannot safely wipe the
564                      * method and branch but it is safe to clear the clazz,
565                      * which serves as the key.
566                      */
567                     predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
568                     break;
569                 default:
570                     ALOGE("Unexpected chaining type: %d", i);
571                     dvmAbort();  // dvmAbort OK here - can't safely recover
572             }
573             COMPILER_TRACE_CHAINING(
574                 ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
575             pChainCells += elemSize;  /* Advance by a fixed number of bytes */
576         }
577     }
578     return NULL;
579 }
580 
581 /* Unchain all translation in the cache. */
dvmJitUnchainAll()582 void dvmJitUnchainAll()
583 {
584     ALOGV("Jit Runtime: unchaining all");
585     if (gDvmJit.pJitEntryTable != NULL) {
586         COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
587         dvmLockMutex(&gDvmJit.tableLock);
588 
589         UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
590 
591         for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
592             if (gDvmJit.pJitEntryTable[i].dPC &&
593                 !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
594                 gDvmJit.pJitEntryTable[i].codeAddress) {
595                       dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
596             }
597         }
598 
599         PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
600 
601         dvmUnlockMutex(&gDvmJit.tableLock);
602         gDvmJit.translationChains = 0;
603     }
604     gDvmJit.hasNewChain = false;
605 }
606 
607 #define P_GPR_1 PhysicalReg_EBX
608 /* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
insertJumpHelp()609 static void insertJumpHelp()
610 {
611     int rem = (uint)stream % 4;
612     int nop_size = 3 - rem;
613     dump_nop(nop_size);
614     unconditional_jump_int(0, OpndSize_32);
615     return;
616 }
617 
618 /* Chaining cell for code that may need warmup. */
619 /* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
620                  blx r0
621                  data 0xb23a //bytecode address: 0x5115b23a
622                  data 0x5115
623    IA32 assembly:
624                   jmp  0 //5 bytes
625                   movl address, %ebx
626                   movl dvmJitToInterpNormal, %eax
627                   call %eax
628                   <-- return address
629 */
handleNormalChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)630 static void handleNormalChainingCell(CompilationUnit *cUnit,
631                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
632 {
633     ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
634           cUnit->method->name, blockId, offset, stream - streamMethodStart);
635     if(dump_x86_inst)
636         ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
637               offset, stream - streamMethodStart, stream);
638     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
639      * reslove the multithreading issue.
640      */
641     insertJumpHelp();
642     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
643     scratchRegs[0] = PhysicalReg_EAX;
644     call_dvmJitToInterpNormal();
645     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
646 }
647 
648 /*
649  * Chaining cell for instructions that immediately following already translated
650  * code.
651  */
handleHotChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)652 static void handleHotChainingCell(CompilationUnit *cUnit,
653                                   unsigned int offset, int blockId, LowOpBlockLabel* labelList)
654 {
655     ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
656           cUnit->method->name, blockId, offset, stream - streamMethodStart);
657     if(dump_x86_inst)
658         ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
659               offset, stream - streamMethodStart, stream);
660     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
661      * reslove the multithreading issue.
662      */
663     insertJumpHelp();
664     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
665     scratchRegs[0] = PhysicalReg_EAX;
666     call_dvmJitToInterpTraceSelect();
667     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
668 }
669 
670 /* Chaining cell for branches that branch back into the same basic block */
handleBackwardBranchChainingCell(CompilationUnit * cUnit,unsigned int offset,int blockId,LowOpBlockLabel * labelList)671 static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
672                                      unsigned int offset, int blockId, LowOpBlockLabel* labelList)
673 {
674     ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
675           cUnit->method->name, blockId, offset, stream - streamMethodStart);
676     if(dump_x86_inst)
677         ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
678               offset, stream - streamMethodStart, stream);
679     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
680      * reslove the multithreading issue.
681      */
682     insertJumpHelp();
683     move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
684     scratchRegs[0] = PhysicalReg_EAX;
685     call_dvmJitToInterpNormal();
686     //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
687 }
688 
689 /* Chaining cell for monomorphic method invocations. */
handleInvokeSingletonChainingCell(CompilationUnit * cUnit,const Method * callee,int blockId,LowOpBlockLabel * labelList)690 static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
691                                               const Method *callee, int blockId, LowOpBlockLabel* labelList)
692 {
693     ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
694           cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
695     if(dump_x86_inst)
696         ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
697               blockId, stream - streamMethodStart, stream);
698     /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
699      * reslove the multithreading issue.
700      */
701     insertJumpHelp();
702     move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
703     scratchRegs[0] = PhysicalReg_EAX;
704     call_dvmJitToInterpTraceSelect();
705     //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
706 }
707 #undef P_GPR_1
708 
709 /* Chaining cell for monomorphic method invocations. */
handleInvokePredictedChainingCell(CompilationUnit * cUnit,int blockId)710 static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
711 {
712     if(dump_x86_inst)
713         ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
714               blockId, stream - streamMethodStart, stream);
715 #ifndef PREDICTED_CHAINING
716     //assume rPC for callee->insns in %ebx
717     scratchRegs[0] = PhysicalReg_EAX;
718     call_dvmJitToInterpTraceSelectNoChain();
719 #else
720     /* make sure section for predicited chaining cell is 4-byte aligned */
721     //int padding = (4 - ((u4)stream & 3)) & 3;
722     //stream += padding;
723     int* streamData = (int*)stream;
724     /* Should not be executed in the initial state */
725     streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
726     streamData[1] = 0;
727     /* To be filled: class */
728     streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
729     /* To be filled: method */
730     streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
731     /*
732      * Rechain count. The initial value of 0 here will trigger chaining upon
733      * the first invocation of this callsite.
734      */
735     streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
736 #if 0
737     ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
738           *((int*)(stream+8)), *((int*)(stream+12)));
739 #endif
740     stream += 20; //5 *4
741 #endif
742 }
743 
744 /* Load the Dalvik PC into r0 and jump to the specified target */
handlePCReconstruction(CompilationUnit * cUnit,LowOpBlockLabel * targetLabel)745 static void handlePCReconstruction(CompilationUnit *cUnit,
746                                    LowOpBlockLabel *targetLabel)
747 {
748 #if 0
749     LowOp **pcrLabel =
750         (LowOp **) cUnit->pcReconstructionList.elemList;
751     int numElems = cUnit->pcReconstructionList.numUsed;
752     int i;
753     for (i = 0; i < numElems; i++) {
754         dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
755         /* r0 = dalvik PC */
756         loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
757         genUnconditionalBranch(cUnit, targetLabel);
758     }
759 #endif
760 }
761 
762 //use O0 code generator for hoisted checks outside of the loop
763 /*
764  * vA = arrayReg;
765  * vB = idxReg;
766  * vC = endConditionReg;
767  * arg[0] = maxC
768  * arg[1] = minC
769  * arg[2] = loopBranchConditionCode
770  */
771 #define P_GPR_1 PhysicalReg_EBX
772 #define P_GPR_2 PhysicalReg_ECX
genHoistedChecksForCountUpLoop(CompilationUnit * cUnit,MIR * mir)773 static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
774 {
775     /*
776      * NOTE: these synthesized blocks don't have ssa names assigned
777      * for Dalvik registers.  However, because they dominate the following
778      * blocks we can simply use the Dalvik name w/ subscript 0 as the
779      * ssa name.
780      */
781     DecodedInstruction *dInsn = &mir->dalvikInsn;
782     const int maxC = dInsn->arg[0];
783 
784     /* assign array in virtual register to P_GPR_1 */
785     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
786     /* assign index in virtual register to P_GPR_2 */
787     get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
788     export_pc();
789     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
790     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
791     int delta = maxC;
792     /*
793      * If the loop end condition is ">=" instead of ">", then the largest value
794      * of the index is "endCondition - 1".
795      */
796     if (dInsn->arg[2] == OP_IF_GE) {
797         delta--;
798     }
799 
800     if (delta < 0) { //+delta
801         //if P_GPR_2 is mapped to a VR, we can't do this
802         alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
803     } else if(delta > 0) {
804         alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
805     }
806     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
807     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
808 }
809 
810 /*
811  * vA = arrayReg;
812  * vB = idxReg;
813  * vC = endConditionReg;
814  * arg[0] = maxC
815  * arg[1] = minC
816  * arg[2] = loopBranchConditionCode
817  */
genHoistedChecksForCountDownLoop(CompilationUnit * cUnit,MIR * mir)818 static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
819 {
820     DecodedInstruction *dInsn = &mir->dalvikInsn;
821     const int maxC = dInsn->arg[0];
822 
823     /* assign array in virtual register to P_GPR_1 */
824     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
825     /* assign index in virtual register to P_GPR_2 */
826     get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
827     export_pc();
828     compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
829     condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
830 
831     if (maxC < 0) {
832         //if P_GPR_2 is mapped to a VR, we can't do this
833         alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
834     } else if(maxC > 0) {
835         alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
836     }
837     compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
838     condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
839 
840 }
841 #undef P_GPR_1
842 #undef P_GPR_2
843 
844 /*
845  * vA = idxReg;
846  * vB = minC;
847  */
848 #define P_GPR_1 PhysicalReg_ECX
genHoistedLowerBoundCheck(CompilationUnit * cUnit,MIR * mir)849 static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
850 {
851     DecodedInstruction *dInsn = &mir->dalvikInsn;
852     const int minC = dInsn->vB;
853     get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
854     export_pc();
855     compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
856     condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
857 }
858 #undef P_GPR_1
859 
860 #ifdef WITH_JIT_INLINING
genValidationForPredictedInline(CompilationUnit * cUnit,MIR * mir)861 static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
862 {
863     CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
864     if(gDvm.executionMode == kExecutionModeNcgO0) {
865         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
866         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
867         compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
868         export_pc(); //use %edx
869         conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
870         move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
871         compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
872     } else {
873         get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
874         move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
875         nullCheck(5, false, 1, mir->dalvikInsn.vC);
876         move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
877         compare_reg_reg(4, false, 6, false);
878     }
879 
880     //immdiate will be updated later in genLandingPadForMispredictedCallee
881     streamMisPred = stream;
882     callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
883 }
884 #endif
885 
886 /* Extended MIR instructions like PHI */
handleExtendedMIR(CompilationUnit * cUnit,MIR * mir)887 void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
888 {
889     ExecutionMode origMode = gDvm.executionMode;
890     gDvm.executionMode = kExecutionModeNcgO0;
891     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
892         case kMirOpPhi: {
893             break;
894         }
895         case kMirOpNullNRangeUpCheck: {
896             genHoistedChecksForCountUpLoop(cUnit, mir);
897             break;
898         }
899         case kMirOpNullNRangeDownCheck: {
900             genHoistedChecksForCountDownLoop(cUnit, mir);
901             break;
902         }
903         case kMirOpLowerBound: {
904             genHoistedLowerBoundCheck(cUnit, mir);
905             break;
906         }
907         case kMirOpPunt: {
908             break;
909         }
910 #ifdef WITH_JIT_INLINING
911         case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
912             genValidationForPredictedInline(cUnit, mir);
913             break;
914         }
915 #endif
916         default:
917             break;
918     }
919     gDvm.executionMode = origMode;
920 }
921 
setupLoopEntryBlock(CompilationUnit * cUnit,BasicBlock * entry,int bodyId)922 static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
923                                 int bodyId)
924 {
925     /*
926      * Next, create two branches - one branch over to the loop body and the
927      * other branch to the PCR cell to punt.
928      */
929     //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
930     //setupResourceMasks(branchToBody);
931     //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
932 
933 #if 0
934     LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
935     branchToPCR->opCode = kThumbBUncond;
936     branchToPCR->generic.target = (LIR *) pcrLabel;
937     setupResourceMasks(branchToPCR);
938     cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
939 #endif
940 }
941 
942 /* check whether we can merge the block at index i with its target block */
mergeBlock(BasicBlock * bb)943 bool mergeBlock(BasicBlock *bb) {
944     if(bb->blockType == kDalvikByteCode &&
945        bb->firstMIRInsn != NULL &&
946        (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
947         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
948         bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
949        bb->fallThrough == NULL) {// &&
950        //cUnit->hasLoop) {
951         //ALOGI("merge blocks ending with goto at index %d", i);
952         MIR* prevInsn = bb->lastMIRInsn->prev;
953         if(bb->taken == NULL) return false;
954         MIR* mergeInsn = bb->taken->firstMIRInsn;
955         if(mergeInsn == NULL) return false;
956         if(prevInsn == NULL) {//the block has a single instruction
957             bb->firstMIRInsn = mergeInsn;
958         } else {
959             prevInsn->next = mergeInsn; //remove goto from the chain
960         }
961         mergeInsn->prev = prevInsn;
962         bb->lastMIRInsn = bb->taken->lastMIRInsn;
963         bb->taken->firstMIRInsn = NULL; //block being merged in
964         bb->fallThrough = bb->taken->fallThrough;
965         bb->taken = bb->taken->taken;
966         return true;
967     }
968     return false;
969 }
970 
genTraceProfileEntry(CompilationUnit * cUnit)971 static int genTraceProfileEntry(CompilationUnit *cUnit)
972 {
973     cUnit->headerSize = 6;
974     if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
975         (gDvmJit.profileMode == kTraceProfilingDisabled)) {
976         return 12;
977     } else {
978         return 4;
979     }
980 
981 }
982 
983 #define PRINT_BUFFER_LEN 1024
984 /* Print the code block in code cache in the range of [startAddr, endAddr)
985  * in readable format.
986  */
printEmittedCodeBlock(unsigned char * startAddr,unsigned char * endAddr)987 void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
988 {
989     char strbuf[PRINT_BUFFER_LEN];
990     unsigned char *addr;
991     unsigned char *next_addr;
992     int n;
993 
994     if (gDvmJit.printBinary) {
995         // print binary in bytes
996         n = 0;
997         for (addr = startAddr; addr < endAddr; addr++) {
998             n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
999             if (n > PRINT_BUFFER_LEN - 10) {
1000                 ALOGD("## %s", strbuf);
1001                 n = 0;
1002             }
1003         }
1004         if (n > 0)
1005             ALOGD("## %s", strbuf);
1006     }
1007 
1008     // print disassembled instructions
1009     addr = startAddr;
1010     while (addr < endAddr) {
1011         next_addr = reinterpret_cast<unsigned char*>
1012             (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1013                                        strbuf, PRINT_BUFFER_LEN));
1014         if (addr != next_addr) {
1015             ALOGD("**  %p: %s", addr, strbuf);
1016         } else {                // check whether this is nop padding
1017             if (addr[0] == 0x90) {
1018                 ALOGD("**  %p: NOP (1 byte)", addr);
1019                 next_addr += 1;
1020             } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1021                 ALOGD("**  %p: NOP (2 bytes)", addr);
1022                 next_addr += 2;
1023             } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1024                 ALOGD("**  %p: NOP (3 bytes)", addr);
1025                 next_addr += 3;
1026             } else {
1027                 ALOGD("** unable to decode binary at %p", addr);
1028                 break;
1029             }
1030         }
1031         addr = next_addr;
1032     }
1033 }
1034 
1035 /* 4 is the number of additional bytes needed for chaining information for trace:
1036  * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1037 #define EXTRA_BYTES_FOR_CHAINING 4
1038 
1039 /* Entry function to invoke the backend of the JIT compiler */
dvmCompilerMIR2LIR(CompilationUnit * cUnit,JitTranslationInfo * info)1040 void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1041 {
1042     dump_x86_inst = cUnit->printMe;
1043     /* Used to hold the labels of each block */
1044     LowOpBlockLabel *labelList =
1045         (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1046     LowOp *headLIR = NULL;
1047     GrowableList chainingListByType[kChainingCellLast];
1048     unsigned int i, padding;
1049 
1050     /*
1051      * Initialize various types chaining lists.
1052      */
1053     for (i = 0; i < kChainingCellLast; i++) {
1054         dvmInitGrowableList(&chainingListByType[i], 2);
1055     }
1056 
1057     /* Clear the visited flag for each block */
1058     dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1059                                           kAllNodes, false /* isIterative */);
1060 
1061     GrowableListIterator iterator;
1062     dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1063 
1064     /* Traces start with a profiling entry point.  Generate it here */
1065     cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1066 
1067     //BasicBlock **blockList = cUnit->blockList;
1068     GrowableList *blockList = &cUnit->blockList;
1069     BasicBlock *bb;
1070 
1071     info->codeAddress = NULL;
1072     stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1073 
1074     // TODO: compile into a temporary buffer and then copy into the code cache.
1075     // That would let us leave the code cache unprotected for a shorter time.
1076     size_t unprotected_code_cache_bytes =
1077             gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1078     UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1079 
1080     streamStart = stream; /* trace start before alignment */
1081     stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1082     stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1083     streamMethodStart = stream; /* code start */
1084     for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1085         labelList[i].lop.generic.offset = -1;
1086     }
1087     cUnit->exceptionBlockId = -1;
1088     for (i = 0; i < blockList->numUsed; i++) {
1089         bb = (BasicBlock *) blockList->elemList[i];
1090         if(bb->blockType == kExceptionHandling)
1091             cUnit->exceptionBlockId = i;
1092     }
1093     startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1094     if(gDvm.executionMode == kExecutionModeNcgO1) {
1095         //merge blocks ending with "goto" with the fall through block
1096         if (cUnit->jitMode != kJitLoop)
1097             for (i = 0; i < blockList->numUsed; i++) {
1098                 bb = (BasicBlock *) blockList->elemList[i];
1099                 bool merged = mergeBlock(bb);
1100                 while(merged) merged = mergeBlock(bb);
1101             }
1102         for (i = 0; i < blockList->numUsed; i++) {
1103             bb = (BasicBlock *) blockList->elemList[i];
1104             if(bb->blockType == kDalvikByteCode &&
1105                bb->firstMIRInsn != NULL) {
1106                 preprocessingBB(bb);
1107             }
1108         }
1109         preprocessingTrace();
1110     }
1111 
1112     /* Handle the content in each basic block */
1113     for (i = 0; ; i++) {
1114         MIR *mir;
1115         bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1116         if (bb == NULL) break;
1117         if (bb->visited == true) continue;
1118 
1119         labelList[i].immOpnd.value = bb->startOffset;
1120 
1121         if (bb->blockType >= kChainingCellLast) {
1122             /*
1123              * Append the label pseudo LIR first. Chaining cells will be handled
1124              * separately afterwards.
1125              */
1126             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1127         }
1128 
1129         if (bb->blockType == kEntryBlock) {
1130             labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1131             if (bb->firstMIRInsn == NULL) {
1132                 continue;
1133             } else {
1134               setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1135                                   //&labelList[blockList[i]->fallThrough->id]);
1136             }
1137         } else if (bb->blockType == kExitBlock) {
1138             labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1139             labelList[i].lop.generic.offset = (stream - streamMethodStart);
1140             goto gen_fallthrough;
1141         } else if (bb->blockType == kDalvikByteCode) {
1142             if (bb->hidden == true) continue;
1143             labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1144             /* Reset the register state */
1145 #if 0
1146             resetRegisterScoreboard(cUnit);
1147 #endif
1148         } else {
1149             switch (bb->blockType) {
1150                 case kChainingCellNormal:
1151                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1152                     /* handle the codegen later */
1153                     dvmInsertGrowableList(
1154                         &chainingListByType[kChainingCellNormal], i);
1155                     break;
1156                 case kChainingCellInvokeSingleton:
1157                     labelList[i].lop.opCode2 =
1158                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1159                     labelList[i].immOpnd.value =
1160                         (int) bb->containingMethod;
1161                     /* handle the codegen later */
1162                     dvmInsertGrowableList(
1163                         &chainingListByType[kChainingCellInvokeSingleton], i);
1164                     break;
1165                 case kChainingCellInvokePredicted:
1166                     labelList[i].lop.opCode2 =
1167                         ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1168                    /*
1169                      * Move the cached method pointer from operand 1 to 0.
1170                      * Operand 0 was clobbered earlier in this routine to store
1171                      * the block starting offset, which is not applicable to
1172                      * predicted chaining cell.
1173                      */
1174                     //TODO
1175                     //labelList[i].operands[0] = labelList[i].operands[1];
1176 
1177                     /* handle the codegen later */
1178                     dvmInsertGrowableList(
1179                         &chainingListByType[kChainingCellInvokePredicted], i);
1180                     break;
1181                 case kChainingCellHot:
1182                     labelList[i].lop.opCode2 =
1183                         ATOM_PSEUDO_CHAINING_CELL_HOT;
1184                     /* handle the codegen later */
1185                     dvmInsertGrowableList(
1186                         &chainingListByType[kChainingCellHot], i);
1187                     break;
1188                 case kPCReconstruction:
1189                     /* Make sure exception handling block is next */
1190                     labelList[i].lop.opCode2 =
1191                         ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1192                     //assert (i == cUnit->numBlocks - 2);
1193                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1194                     handlePCReconstruction(cUnit,
1195                                            &labelList[cUnit->puntBlock->id]);
1196                     break;
1197                 case kExceptionHandling:
1198                     labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1199                     labelList[i].lop.generic.offset = (stream - streamMethodStart);
1200                     //if (cUnit->pcReconstructionList.numUsed) {
1201                         scratchRegs[0] = PhysicalReg_EAX;
1202                         jumpToInterpPunt();
1203                         //call_dvmJitToInterpPunt();
1204                     //}
1205                     break;
1206                 case kChainingCellBackwardBranch:
1207                     labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1208                     /* handle the codegen later */
1209                     dvmInsertGrowableList(
1210                         &chainingListByType[kChainingCellBackwardBranch],
1211                         i);
1212                     break;
1213                 default:
1214                     break;
1215             }
1216             continue;
1217         }
1218         {
1219         //LowOp *headLIR = NULL;
1220         const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1221         const u2 *startCodePtr = dexCode->insns;
1222         const u2 *codePtr;
1223         labelList[i].lop.generic.offset = (stream - streamMethodStart);
1224         ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1225               bb->id, bb->blockType, bb->hidden);
1226         for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1227             bb = nextBB;
1228             bb->visited = true;
1229             cUnit->nextCodegenBlock = NULL;
1230 
1231         if(gDvm.executionMode == kExecutionModeNcgO1 &&
1232            bb->blockType != kEntryBlock &&
1233            bb->firstMIRInsn != NULL) {
1234             startOfBasicBlock(bb);
1235             int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1236             endOfBasicBlock(bb);
1237             if(cg_ret < 0) {
1238                 endOfTrace(true/*freeOnly*/);
1239                 cUnit->baseAddr = NULL;
1240                 ALOGI("codeGenBasicBlockJit returns negative number");
1241                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1242                 return;
1243             }
1244         } else {
1245         for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1246             startOfBasicBlock(bb); //why here for O0
1247             Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1248             if((int)dalvikOpCode >= (int)kMirOpFirst) {
1249                 handleExtendedMIR(cUnit, mir);
1250                 continue;
1251             }
1252             InstructionFormat dalvikFormat =
1253                 dexGetFormatFromOpcode(dalvikOpCode);
1254             ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1255                   mir->offset, dalvikOpCode, dalvikFormat);
1256             LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1257             /* Remember the first LIR for this block */
1258             if (headLIR == NULL) {
1259                 headLIR = (LowOp*)boundaryLIR;
1260             }
1261             bool notHandled = true;
1262             /*
1263              * Debugging: screen the opcode first to see if it is in the
1264              * do[-not]-compile list
1265              */
1266             bool singleStepMe =
1267                 gDvmJit.includeSelectedOp !=
1268                 ((gDvmJit.opList[dalvikOpCode >> 3] &
1269                   (1 << (dalvikOpCode & 0x7))) !=
1270                  0);
1271             if (singleStepMe || cUnit->allSingleStep) {
1272             } else {
1273                 codePtr = startCodePtr + mir->offset;
1274                 //lower each byte code, update LIR
1275                 notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1276                 if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1277                    CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1278                     ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1279                     gDvmJit.codeCacheFull = true;
1280                     cUnit->baseAddr = NULL;
1281                     endOfTrace(true/*freeOnly*/);
1282                     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1283                     return;
1284                 }
1285             }
1286             if (notHandled) {
1287                 ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1288                      mir->offset,
1289                      dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1290                      dalvikFormat);
1291                 dvmAbort();
1292                 break;
1293             }
1294         } // end for
1295         } // end else //JIT + O0 code generator
1296         }
1297         } // end for
1298         /* Eliminate redundant loads/stores and delay stores into later slots */
1299 #if 0
1300         dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1301                                            cUnit->lastLIRInsn);
1302 #endif
1303         if (headLIR) headLIR = NULL;
1304 gen_fallthrough:
1305         /*
1306          * Check if the block is terminated due to trace length constraint -
1307          * insert an unconditional branch to the chaining cell.
1308          */
1309         if (bb->needFallThroughBranch) {
1310             jumpToBasicBlock(stream, bb->fallThrough->id);
1311         }
1312 
1313     }
1314 
1315     char* streamChainingStart = (char*)stream;
1316     /* Handle the chaining cells in predefined order */
1317     for (i = 0; i < kChainingCellGap; i++) {
1318         size_t j;
1319         int *blockIdList = (int *) chainingListByType[i].elemList;
1320 
1321         cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1322 
1323         /* No chaining cells of this type */
1324         if (cUnit->numChainingCells[i] == 0)
1325             continue;
1326 
1327         /* Record the first LIR for a new type of chaining cell */
1328         cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1329         for (j = 0; j < chainingListByType[i].numUsed; j++) {
1330             int blockId = blockIdList[j];
1331             BasicBlock *chainingBlock =
1332                 (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1333                                                          blockId);
1334 
1335             labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1336 
1337             /* Align this chaining cell first */
1338 #if 0
1339             newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1340 #endif
1341             /* Insert the pseudo chaining instruction */
1342             dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1343 
1344 
1345             switch (chainingBlock->blockType) {
1346                 case kChainingCellNormal:
1347                     handleNormalChainingCell(cUnit,
1348                      chainingBlock->startOffset, blockId, labelList);
1349                     break;
1350                 case kChainingCellInvokeSingleton:
1351                     handleInvokeSingletonChainingCell(cUnit,
1352                         chainingBlock->containingMethod, blockId, labelList);
1353                     break;
1354                 case kChainingCellInvokePredicted:
1355                     handleInvokePredictedChainingCell(cUnit, blockId);
1356                     break;
1357                 case kChainingCellHot:
1358                     handleHotChainingCell(cUnit,
1359                         chainingBlock->startOffset, blockId, labelList);
1360                     break;
1361                 case kChainingCellBackwardBranch:
1362                     handleBackwardBranchChainingCell(cUnit,
1363                         chainingBlock->startOffset, blockId, labelList);
1364                     break;
1365                 default:
1366                     ALOGE("Bad blocktype %d", chainingBlock->blockType);
1367                     dvmAbort();
1368                     break;
1369             }
1370 
1371             if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1372                 ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1373                 gDvmJit.codeCacheFull = true;
1374                 cUnit->baseAddr = NULL;
1375                 endOfTrace(true); /* need to free structures */
1376                 PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1377                 return;
1378             }
1379         }
1380     }
1381 #if 0
1382     dvmCompilerApplyGlobalOptimizations(cUnit);
1383 #endif
1384     endOfTrace(false);
1385 
1386     if (gDvmJit.codeCacheFull) {
1387         /* We hit code cache size limit inside endofTrace(false).
1388          * Bail out for this trace!
1389          */
1390         ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1391         cUnit->baseAddr = NULL;
1392         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1393         return;
1394     }
1395 
1396     /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1397     padding = (4 - ((u4)stream & 3)) & 3;
1398     stream += padding;
1399     ChainCellCounts chainCellCounts;
1400     /* Install the chaining cell counts */
1401     for (i=0; i< kChainingCellGap; i++) {
1402         chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1403     }
1404     char* streamCountStart = (char*)stream;
1405     memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1406     stream += sizeof(chainCellCounts);
1407 
1408     cUnit->baseAddr = streamMethodStart;
1409     cUnit->totalSize = (stream - streamStart);
1410     if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1411         ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1412         gDvmJit.codeCacheFull = true;
1413         cUnit->baseAddr = NULL;
1414         PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1415         return;
1416     }
1417 
1418     /* write chaining cell count offset & chaining cell offset */
1419     u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1420     *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1421     pOffset[1] = streamChainingStart - streamMethodStart;
1422 
1423     PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1424 
1425     gDvmJit.codeCacheByteUsed += (stream - streamStart);
1426     if (cUnit->printMe) {
1427         unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1428         unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1429         ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1430               cUnit->method->clazz->descriptor, cUnit->method->name,
1431               codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1432         ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1433               cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1434         printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1435     }
1436     ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1437           (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1438           cUnit->totalSize, gDvmJit.codeCache);
1439 
1440     gDvmJit.numCompilations++;
1441 
1442     info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1443 }
1444 
1445 /*
1446  * Perform translation chain operation.
1447  */
dvmJitChain(void * tgtAddr,u4 * branchAddr)1448 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1449 {
1450 #ifdef JIT_CHAIN
1451     int relOffset = (int) tgtAddr - (int)branchAddr;
1452 
1453     if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1454         (gDvmJit.codeCacheFull == false)) {
1455 
1456         gDvmJit.translationChains++;
1457 
1458         //OpndSize immSize = estOpndSizeFromImm(relOffset);
1459         //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1460         /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1461          * the original code sequence.
1462          */
1463         OpndSize immSize = OpndSize_32;
1464         relOffset -= 5;
1465         //can't use stream here since it is used by the compilation thread
1466         UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1467         dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1468         PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1469 
1470         gDvmJit.hasNewChain = true;
1471 
1472         COMPILER_TRACE_CHAINING(
1473             ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1474                   (int) branchAddr, tgtAddr, relOffset));
1475     }
1476 #endif
1477     return tgtAddr;
1478 }
1479 
1480 /*
1481  * Accept the work and start compiling.  Returns true if compilation
1482  * is attempted.
1483  */
dvmCompilerDoWork(CompilerWorkOrder * work)1484 bool dvmCompilerDoWork(CompilerWorkOrder *work)
1485 {
1486     JitTraceDescription *desc;
1487     bool isCompile;
1488     bool success = true;
1489 
1490     if (gDvmJit.codeCacheFull) {
1491         return false;
1492     }
1493 
1494     switch (work->kind) {
1495         case kWorkOrderTrace:
1496             isCompile = true;
1497             /* Start compilation with maximally allowed trace length */
1498             desc = (JitTraceDescription *)work->info;
1499             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1500                                         work->bailPtr, 0 /* no hints */);
1501             break;
1502         case kWorkOrderTraceDebug: {
1503             bool oldPrintMe = gDvmJit.printMe;
1504             gDvmJit.printMe = true;
1505             isCompile = true;
1506             /* Start compilation with maximally allowed trace length */
1507             desc = (JitTraceDescription *)work->info;
1508             success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1509                                         work->bailPtr, 0 /* no hints */);
1510             gDvmJit.printMe = oldPrintMe;
1511             break;
1512         }
1513         case kWorkOrderProfileMode:
1514             dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1515             isCompile = false;
1516             break;
1517         default:
1518             isCompile = false;
1519             ALOGE("Jit: unknown work order type");
1520             assert(0);  // Bail if debug build, discard otherwise
1521     }
1522     if (!success)
1523         work->result.codeAddress = NULL;
1524     return isCompile;
1525 }
1526 
dvmCompilerCacheFlush(long start,long end,long flags)1527 void dvmCompilerCacheFlush(long start, long end, long flags) {
1528   /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1529 }
1530 
1531 //#endif
1532