1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Android's method call profiling goodies.
19 */
20 #include "Dalvik.h"
21 #include <interp/InterpDefs.h>
22
23 #include <stdlib.h>
24 #include <stddef.h>
25 #include <string.h>
26 #include <sys/time.h>
27 #include <time.h>
28 #include <sys/mman.h>
29 #include <sched.h>
30 #include <errno.h>
31 #include <fcntl.h>
32
33 #include <cutils/open_memstream.h>
34
35 #ifdef HAVE_ANDROID_OS
36 # define UPDATE_MAGIC_PAGE 1
37 #endif
38
39 /*
40 * File format:
41 * header
42 * record 0
43 * record 1
44 * ...
45 *
46 * Header format:
47 * u4 magic ('SLOW')
48 * u2 version
49 * u2 offset to data
50 * u8 start date/time in usec
51 * u2 record size in bytes (version >= 2 only)
52 * ... padding to 32 bytes
53 *
54 * Record format v1:
55 * u1 thread ID
56 * u4 method ID | method action
57 * u4 time delta since start, in usec
58 *
59 * Record format v2:
60 * u2 thread ID
61 * u4 method ID | method action
62 * u4 time delta since start, in usec
63 *
64 * Record format v3:
65 * u2 thread ID
66 * u4 method ID | method action
67 * u4 time delta since start, in usec
68 * u4 wall time since start, in usec (when clock == "dual" only)
69 *
70 * 32 bits of microseconds is 70 minutes.
71 *
72 * All values are stored in little-endian order.
73 */
74 #define TRACE_REC_SIZE_SINGLE_CLOCK 10 // using v2
75 #define TRACE_REC_SIZE_DUAL_CLOCK 14 // using v3 with two timestamps
76 #define TRACE_MAGIC 0x574f4c53
77 #define TRACE_HEADER_LEN 32
78
79 #define FILL_PATTERN 0xeeeeeeee
80
81
82 /*
83 * Returns true if the thread CPU clock should be used.
84 */
useThreadCpuClock()85 static inline bool useThreadCpuClock() {
86 #if defined(HAVE_POSIX_CLOCKS)
87 return gDvm.profilerClockSource != kProfilerClockSourceWall;
88 #else
89 return false;
90 #endif
91 }
92
93 /*
94 * Returns true if the wall clock should be used.
95 */
useWallClock()96 static inline bool useWallClock() {
97 #if defined(HAVE_POSIX_CLOCKS)
98 return gDvm.profilerClockSource != kProfilerClockSourceThreadCpu;
99 #else
100 return true;
101 #endif
102 }
103
104 /*
105 * Get the wall-clock date/time, in usec.
106 */
getWallTimeInUsec()107 static inline u8 getWallTimeInUsec()
108 {
109 struct timeval tv;
110
111 gettimeofday(&tv, NULL);
112 return tv.tv_sec * 1000000LL + tv.tv_usec;
113 }
114
115 #if defined(HAVE_POSIX_CLOCKS)
116 /*
117 * Get the thread-cpu time, in usec.
118 * We use this clock when we can because it enables us to track the time that
119 * a thread spends running and not blocked.
120 */
getThreadCpuTimeInUsec(Thread * thread)121 static inline u8 getThreadCpuTimeInUsec(Thread* thread)
122 {
123 clockid_t cid;
124 struct timespec tm;
125 pthread_getcpuclockid(thread->handle, &cid);
126 clock_gettime(cid, &tm);
127 if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
128 ALOGE("bad nsec: %ld", tm.tv_nsec);
129 dvmAbort();
130 }
131 return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000;
132 }
133 #endif
134
135 /*
136 * Get the clock used for stopwatch-like timing measurements on a single thread.
137 */
getStopwatchClock()138 static inline u8 getStopwatchClock()
139 {
140 #if defined(HAVE_POSIX_CLOCKS)
141 return getThreadCpuTimeInUsec(dvmThreadSelf());
142 #else
143 return getWallTimeInUsec();
144 #endif
145 }
146
147 /*
148 * Write little-endian data.
149 */
storeShortLE(u1 * buf,u2 val)150 static inline void storeShortLE(u1* buf, u2 val)
151 {
152 *buf++ = (u1) val;
153 *buf++ = (u1) (val >> 8);
154 }
storeIntLE(u1 * buf,u4 val)155 static inline void storeIntLE(u1* buf, u4 val)
156 {
157 *buf++ = (u1) val;
158 *buf++ = (u1) (val >> 8);
159 *buf++ = (u1) (val >> 16);
160 *buf++ = (u1) (val >> 24);
161 }
storeLongLE(u1 * buf,u8 val)162 static inline void storeLongLE(u1* buf, u8 val)
163 {
164 *buf++ = (u1) val;
165 *buf++ = (u1) (val >> 8);
166 *buf++ = (u1) (val >> 16);
167 *buf++ = (u1) (val >> 24);
168 *buf++ = (u1) (val >> 32);
169 *buf++ = (u1) (val >> 40);
170 *buf++ = (u1) (val >> 48);
171 *buf++ = (u1) (val >> 56);
172 }
173
174 /*
175 * Gets a thread's stack trace as an array of method pointers of length pCount.
176 * The returned array must be freed by the caller.
177 */
getStackTrace(Thread * thread,size_t * pCount)178 static const Method** getStackTrace(Thread* thread, size_t* pCount)
179 {
180 void* fp = thread->interpSave.curFrame;
181 assert(thread == dvmThreadSelf() || dvmIsSuspended(thread));
182
183 /* Compute the stack depth. */
184 size_t stackDepth = 0;
185 while (fp != NULL) {
186 const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
187
188 if (!dvmIsBreakFrame((u4*) fp))
189 stackDepth++;
190
191 assert(fp != saveArea->prevFrame);
192 fp = saveArea->prevFrame;
193 }
194 *pCount = stackDepth;
195
196 /*
197 * Allocate memory for stack trace. This must be freed later, either by
198 * freeThreadStackTraceSamples when tracing stops or by freeThread.
199 */
200 const Method** stackTrace = (const Method**) malloc(sizeof(Method*) *
201 stackDepth);
202 if (stackTrace == NULL)
203 return NULL;
204
205 /* Walk the stack a second time, filling in the stack trace. */
206 const Method** ptr = stackTrace;
207 fp = thread->interpSave.curFrame;
208 while (fp != NULL) {
209 const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
210 const Method* method = saveArea->method;
211
212 if (!dvmIsBreakFrame((u4*) fp)) {
213 *ptr++ = method;
214 stackDepth--;
215 }
216 assert(fp != saveArea->prevFrame);
217 fp = saveArea->prevFrame;
218 }
219 assert(stackDepth == 0);
220
221 return stackTrace;
222 }
223 /*
224 * Get a sample of the stack trace for a thread.
225 */
getSample(Thread * thread)226 static void getSample(Thread* thread)
227 {
228 /* Get old and new stack trace for thread. */
229 size_t newLength = 0;
230 const Method** newStackTrace = getStackTrace(thread, &newLength);
231 size_t oldLength = thread->stackTraceSampleLength;
232 const Method** oldStackTrace = thread->stackTraceSample;
233
234 /* Read time clocks to use for all events in this trace. */
235 u4 cpuClockDiff = 0;
236 u4 wallClockDiff = 0;
237 dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff);
238 if (oldStackTrace == NULL) {
239 /*
240 * If there's no previous stack trace sample, log an entry event for
241 * every method in the trace.
242 */
243 for (int i = newLength - 1; i >= 0; --i) {
244 dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
245 cpuClockDiff, wallClockDiff);
246 }
247 } else {
248 /*
249 * If there's a previous stack trace, diff the traces and emit entry
250 * and exit events accordingly.
251 */
252 int diffIndexOld = oldLength - 1;
253 int diffIndexNew = newLength - 1;
254 /* Iterate bottom-up until there's a difference between traces. */
255 while (diffIndexOld >= 0 && diffIndexNew >= 0 &&
256 oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) {
257 diffIndexOld--;
258 diffIndexNew--;
259 }
260 /* Iterate top-down over old trace until diff, emitting exit events. */
261 for (int i = 0; i <= diffIndexOld; ++i) {
262 dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT,
263 cpuClockDiff, wallClockDiff);
264 }
265 /* Iterate bottom-up over new trace from diff, emitting entry events. */
266 for (int i = diffIndexNew; i >= 0; --i) {
267 dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
268 cpuClockDiff, wallClockDiff);
269 }
270 }
271
272 /* Free the old stack trace and update the thread's stack trace sample. */
273 free(oldStackTrace);
274 thread->stackTraceSample = newStackTrace;
275 thread->stackTraceSampleLength = newLength;
276 }
277
278 /*
279 * Entry point for sampling thread. The sampling interval in microseconds is
280 * passed in as an argument.
281 */
runSamplingThread(void * arg)282 static void* runSamplingThread(void* arg)
283 {
284 int intervalUs = (int) arg;
285 while (gDvm.methodTrace.traceEnabled) {
286 dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING);
287
288 dvmLockThreadList(dvmThreadSelf());
289 for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) {
290 getSample(thread);
291 }
292 dvmUnlockThreadList();
293
294 dvmResumeAllThreads(SUSPEND_FOR_SAMPLING);
295
296 usleep(intervalUs);
297 }
298 return NULL;
299 }
300
301 /*
302 * Boot-time init.
303 */
dvmProfilingStartup()304 bool dvmProfilingStartup()
305 {
306 /*
307 * Initialize "dmtrace" method profiling.
308 */
309 memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace));
310 dvmInitMutex(&gDvm.methodTrace.startStopLock);
311 pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL);
312
313 assert(!dvmCheckException(dvmThreadSelf()));
314
315 /*
316 * Allocate storage for instruction counters.
317 */
318 gDvm.executedInstrCounts = (int*) calloc(kNumPackedOpcodes, sizeof(int));
319 if (gDvm.executedInstrCounts == NULL)
320 return false;
321
322 #ifdef UPDATE_MAGIC_PAGE
323 /*
324 * If we're running on the emulator, there's a magic page into which
325 * we can put interpreted method information. This allows interpreted
326 * methods to show up in the emulator's code traces.
327 *
328 * We could key this off of the "ro.kernel.qemu" property, but there's
329 * no real harm in doing this on a real device.
330 */
331 int fd = open("/dev/qemu_trace", O_RDWR);
332 if (fd < 0) {
333 ALOGV("Unable to open /dev/qemu_trace");
334 } else {
335 gDvm.emulatorTracePage = mmap(0, SYSTEM_PAGE_SIZE, PROT_READ|PROT_WRITE,
336 MAP_SHARED, fd, 0);
337 close(fd);
338 if (gDvm.emulatorTracePage == MAP_FAILED) {
339 ALOGE("Unable to mmap /dev/qemu_trace");
340 gDvm.emulatorTracePage = NULL;
341 } else {
342 *(u4*) gDvm.emulatorTracePage = 0;
343 }
344 }
345 #else
346 assert(gDvm.emulatorTracePage == NULL);
347 #endif
348
349 return true;
350 }
351
352 /*
353 * Free up profiling resources.
354 */
dvmProfilingShutdown()355 void dvmProfilingShutdown()
356 {
357 #ifdef UPDATE_MAGIC_PAGE
358 if (gDvm.emulatorTracePage != NULL)
359 munmap(gDvm.emulatorTracePage, SYSTEM_PAGE_SIZE);
360 #endif
361 free(gDvm.executedInstrCounts);
362 }
363
364 /*
365 * Update the set of active profilers
366 */
updateActiveProfilers(ExecutionSubModes newMode,bool enable)367 static void updateActiveProfilers(ExecutionSubModes newMode, bool enable)
368 {
369 int oldValue, newValue;
370
371 // Update global count
372 do {
373 oldValue = gDvm.activeProfilers;
374 newValue = oldValue + (enable ? 1 : -1);
375 if (newValue < 0) {
376 ALOGE("Can't have %d active profilers", newValue);
377 dvmAbort();
378 }
379 } while (android_atomic_release_cas(oldValue, newValue,
380 &gDvm.activeProfilers) != 0);
381
382 // Tell the threads
383 if (enable) {
384 dvmEnableAllSubMode(newMode);
385 } else {
386 dvmDisableAllSubMode(newMode);
387 }
388
389 #if defined(WITH_JIT)
390 dvmCompilerUpdateGlobalState();
391 #endif
392
393 ALOGD("+++ active profiler count now %d", newValue);
394 }
395
396
397 /*
398 * Reset the "cpuClockBase" field in all threads.
399 */
resetCpuClockBase()400 static void resetCpuClockBase()
401 {
402 Thread* thread;
403
404 dvmLockThreadList(NULL);
405 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
406 thread->cpuClockBaseSet = false;
407 thread->cpuClockBase = 0;
408 }
409 dvmUnlockThreadList();
410 }
411
412 /*
413 * Free and reset the "stackTraceSample" field in all threads.
414 */
freeThreadStackTraceSamples()415 static void freeThreadStackTraceSamples()
416 {
417 Thread* thread;
418
419 dvmLockThreadList(NULL);
420 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
421 free(thread->stackTraceSample);
422 thread->stackTraceSample = NULL;
423 }
424 dvmUnlockThreadList();
425 }
426
427 /*
428 * Dump the thread list to the specified file.
429 */
dumpThreadList(FILE * fp)430 static void dumpThreadList(FILE* fp) {
431 dvmLockThreadList(NULL);
432 for (Thread* thread = gDvm.threadList; thread != NULL; thread = thread->next) {
433 std::string threadName(dvmGetThreadName(thread));
434 fprintf(fp, "%d\t%s\n", thread->threadId, threadName.c_str());
435 }
436 dvmUnlockThreadList();
437 }
438
439 /*
440 * This is a dvmHashForeach callback.
441 */
dumpMarkedMethods(void * vclazz,void * vfp)442 static int dumpMarkedMethods(void* vclazz, void* vfp)
443 {
444 DexStringCache stringCache;
445 ClassObject* clazz = (ClassObject*) vclazz;
446 FILE* fp = (FILE*) vfp;
447 Method* meth;
448 char* name;
449 int i;
450
451 dexStringCacheInit(&stringCache);
452
453 for (i = 0; i < clazz->virtualMethodCount; i++) {
454 meth = &clazz->virtualMethods[i];
455 if (meth->inProfile) {
456 name = dvmDescriptorToName(meth->clazz->descriptor);
457 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
458 name, meth->name,
459 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
460 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
461 meth->inProfile = false;
462 free(name);
463 }
464 }
465
466 for (i = 0; i < clazz->directMethodCount; i++) {
467 meth = &clazz->directMethods[i];
468 if (meth->inProfile) {
469 name = dvmDescriptorToName(meth->clazz->descriptor);
470 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
471 name, meth->name,
472 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
473 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
474 meth->inProfile = false;
475 free(name);
476 }
477 }
478
479 dexStringCacheRelease(&stringCache);
480
481 return 0;
482 }
483
484 /*
485 * Dump the list of "marked" methods to the specified file.
486 */
dumpMethodList(FILE * fp)487 static void dumpMethodList(FILE* fp)
488 {
489 dvmHashTableLock(gDvm.loadedClasses);
490 dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp);
491 dvmHashTableUnlock(gDvm.loadedClasses);
492 }
493
494 /*
495 * Start method tracing. Method tracing is global to the VM (i.e. we
496 * trace all threads).
497 *
498 * This opens the output file (if an already open fd has not been supplied,
499 * and we're not going direct to DDMS) and allocates the data buffer. This
500 * takes ownership of the file descriptor, closing it on completion.
501 *
502 * On failure, we throw an exception and return.
503 */
dvmMethodTraceStart(const char * traceFileName,int traceFd,int bufferSize,int flags,bool directToDdms,bool samplingEnabled,int intervalUs)504 void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
505 int flags, bool directToDdms, bool samplingEnabled, int intervalUs)
506 {
507 MethodTraceState* state = &gDvm.methodTrace;
508
509 assert(bufferSize > 0);
510
511 dvmLockMutex(&state->startStopLock);
512 while (state->traceEnabled != 0) {
513 ALOGI("TRACE start requested, but already in progress; stopping");
514 dvmUnlockMutex(&state->startStopLock);
515 dvmMethodTraceStop();
516 dvmLockMutex(&state->startStopLock);
517 }
518 ALOGI("TRACE STARTED: '%s' %dKB", traceFileName, bufferSize / 1024);
519
520 /*
521 * Allocate storage and open files.
522 *
523 * We don't need to initialize the buffer, but doing so might remove
524 * some fault overhead if the pages aren't mapped until touched.
525 */
526 state->buf = (u1*) malloc(bufferSize);
527 if (state->buf == NULL) {
528 dvmThrowInternalError("buffer alloc failed");
529 goto fail;
530 }
531 if (!directToDdms) {
532 if (traceFd < 0) {
533 state->traceFile = fopen(traceFileName, "w");
534 } else {
535 state->traceFile = fdopen(traceFd, "w");
536 }
537 if (state->traceFile == NULL) {
538 int err = errno;
539 ALOGE("Unable to open trace file '%s': %s",
540 traceFileName, strerror(err));
541 dvmThrowExceptionFmt(gDvm.exRuntimeException,
542 "Unable to open trace file '%s': %s",
543 traceFileName, strerror(err));
544 goto fail;
545 }
546 }
547 traceFd = -1;
548 memset(state->buf, (char)FILL_PATTERN, bufferSize);
549
550 state->directToDdms = directToDdms;
551 state->bufferSize = bufferSize;
552 state->overflow = false;
553
554 /*
555 * Enable alloc counts if we've been requested to do so.
556 */
557 state->flags = flags;
558 if ((flags & TRACE_ALLOC_COUNTS) != 0)
559 dvmStartAllocCounting();
560
561 /* reset our notion of the start time for all CPU threads */
562 resetCpuClockBase();
563
564 state->startWhen = getWallTimeInUsec();
565
566 if (useThreadCpuClock() && useWallClock()) {
567 state->traceVersion = 3;
568 state->recordSize = TRACE_REC_SIZE_DUAL_CLOCK;
569 } else {
570 state->traceVersion = 2;
571 state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
572 }
573
574 state->samplingEnabled = samplingEnabled;
575
576 /*
577 * Output the header.
578 */
579 memset(state->buf, 0, TRACE_HEADER_LEN);
580 storeIntLE(state->buf + 0, TRACE_MAGIC);
581 storeShortLE(state->buf + 4, state->traceVersion);
582 storeShortLE(state->buf + 6, TRACE_HEADER_LEN);
583 storeLongLE(state->buf + 8, state->startWhen);
584 if (state->traceVersion >= 3) {
585 storeShortLE(state->buf + 16, state->recordSize);
586 }
587 state->curOffset = TRACE_HEADER_LEN;
588
589 /*
590 * Set the "enabled" flag. Once we do this, threads will wait to be
591 * signaled before exiting, so we have to make sure we wake them up.
592 */
593 android_atomic_release_store(true, &state->traceEnabled);
594
595 /*
596 * ENHANCEMENT: To trace just a single thread, modify the
597 * following to take a Thread* argument, and set the appropriate
598 * interpBreak flags only on the target thread.
599 */
600 if (samplingEnabled) {
601 updateActiveProfilers(kSubModeSampleTrace, true);
602 /* Start the sampling thread. */
603 if (!dvmCreateInternalThread(&state->samplingThreadHandle,
604 "Sampling Thread", &runSamplingThread, (void*) intervalUs)) {
605 dvmThrowInternalError("failed to create sampling thread");
606 goto fail;
607 }
608 } else {
609 updateActiveProfilers(kSubModeMethodTrace, true);
610 }
611
612 dvmUnlockMutex(&state->startStopLock);
613 return;
614
615 fail:
616 if (state->traceFile != NULL) {
617 fclose(state->traceFile);
618 state->traceFile = NULL;
619 }
620 if (state->buf != NULL) {
621 free(state->buf);
622 state->buf = NULL;
623 }
624 if (traceFd >= 0)
625 close(traceFd);
626 dvmUnlockMutex(&state->startStopLock);
627 }
628
629 /*
630 * Run through the data buffer and pull out the methods that were visited.
631 * Set a mark so that we know which ones to output.
632 */
markTouchedMethods(int endOffset)633 static void markTouchedMethods(int endOffset)
634 {
635 u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN;
636 u1* end = gDvm.methodTrace.buf + endOffset;
637 size_t recordSize = gDvm.methodTrace.recordSize;
638 unsigned int methodVal;
639 Method* method;
640
641 while (ptr < end) {
642 methodVal = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16)
643 | (ptr[5] << 24);
644 method = (Method*) METHOD_ID(methodVal);
645
646 method->inProfile = true;
647 ptr += recordSize;
648 }
649 }
650
651 /*
652 * Exercises the clocks in the same way they will be during profiling.
653 */
measureClockOverhead()654 static inline void measureClockOverhead()
655 {
656 #if defined(HAVE_POSIX_CLOCKS)
657 if (useThreadCpuClock()) {
658 getThreadCpuTimeInUsec(dvmThreadSelf());
659 }
660 #endif
661 if (useWallClock()) {
662 getWallTimeInUsec();
663 }
664 }
665
666 /*
667 * Compute the amount of overhead in a clock call, in nsec.
668 *
669 * This value is going to vary depending on what else is going on in the
670 * system. When examined across several runs a pattern should emerge.
671 */
getClockOverhead()672 static u4 getClockOverhead()
673 {
674 u8 calStart, calElapsed;
675 int i;
676
677 calStart = getStopwatchClock();
678 for (i = 1000 * 4; i > 0; i--) {
679 measureClockOverhead();
680 measureClockOverhead();
681 measureClockOverhead();
682 measureClockOverhead();
683 measureClockOverhead();
684 measureClockOverhead();
685 measureClockOverhead();
686 measureClockOverhead();
687 }
688
689 calElapsed = getStopwatchClock() - calStart;
690 return (int) (calElapsed / (8*4));
691 }
692
693 /*
694 * Indicates if method tracing is active and what kind of tracing is active.
695 */
dvmGetMethodTracingMode()696 TracingMode dvmGetMethodTracingMode()
697 {
698 const MethodTraceState* state = &gDvm.methodTrace;
699 if (!state->traceEnabled) {
700 return TRACING_INACTIVE;
701 } else if (state->samplingEnabled) {
702 return SAMPLE_PROFILING_ACTIVE;
703 } else {
704 return METHOD_TRACING_ACTIVE;
705 }
706 }
707
708 /*
709 * Stop method tracing. We write the buffer to disk and generate a key
710 * file so we can interpret it.
711 */
dvmMethodTraceStop()712 void dvmMethodTraceStop()
713 {
714 MethodTraceState* state = &gDvm.methodTrace;
715 bool samplingEnabled = state->samplingEnabled;
716 u8 elapsed;
717
718 /*
719 * We need this to prevent somebody from starting a new trace while
720 * we're in the process of stopping the old.
721 */
722 dvmLockMutex(&state->startStopLock);
723
724 if (!state->traceEnabled) {
725 /* somebody already stopped it, or it was never started */
726 ALOGD("TRACE stop requested, but not running");
727 dvmUnlockMutex(&state->startStopLock);
728 return;
729 } else {
730 if (samplingEnabled) {
731 updateActiveProfilers(kSubModeSampleTrace, false);
732 } else {
733 updateActiveProfilers(kSubModeMethodTrace, false);
734 }
735 }
736
737 /* compute elapsed time */
738 elapsed = getWallTimeInUsec() - state->startWhen;
739
740 /*
741 * Globally disable it, and allow other threads to notice. We want
742 * to stall here for at least as long as dvmMethodTraceAdd needs
743 * to finish. There's no real risk though -- it will take a while to
744 * write the data to disk, and we don't clear the buffer pointer until
745 * after that completes.
746 */
747 state->traceEnabled = false;
748 ANDROID_MEMBAR_FULL();
749 sched_yield();
750 usleep(250 * 1000);
751
752 if ((state->flags & TRACE_ALLOC_COUNTS) != 0)
753 dvmStopAllocCounting();
754
755 /*
756 * It's possible under some circumstances for a thread to have advanced
757 * the data pointer but not written the method value. It's possible
758 * (though less likely) for the data pointer to be advanced, or partial
759 * data written, while we're doing work here.
760 *
761 * To avoid seeing partially-written data, we grab state->curOffset here,
762 * and use our local copy from here on. We then scan through what's
763 * already written. If we see the fill pattern in what should be the
764 * method pointer, we cut things off early. (If we don't, we'll fail
765 * when we dereference the pointer.)
766 *
767 * There's a theoretical possibility of interrupting another thread
768 * after it has partially written the method pointer, in which case
769 * we'll likely crash when we dereference it. The possibility of
770 * this actually happening should be at or near zero. Fixing it
771 * completely could be done by writing the thread number last and
772 * using a sentinel value to indicate a partially-written record,
773 * but that requires memory barriers.
774 */
775 int finalCurOffset = state->curOffset;
776
777 size_t recordSize = state->recordSize;
778 if (finalCurOffset > TRACE_HEADER_LEN) {
779 u4 fillVal = METHOD_ID(FILL_PATTERN);
780 u1* scanPtr = state->buf + TRACE_HEADER_LEN;
781
782 while (scanPtr < state->buf + finalCurOffset) {
783 u4 methodVal = scanPtr[2] | (scanPtr[3] << 8) | (scanPtr[4] << 16)
784 | (scanPtr[5] << 24);
785 if (METHOD_ID(methodVal) == fillVal) {
786 u1* scanBase = state->buf + TRACE_HEADER_LEN;
787 ALOGW("Found unfilled record at %d (of %d)",
788 (scanPtr - scanBase) / recordSize,
789 (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
790 finalCurOffset = scanPtr - state->buf;
791 break;
792 }
793
794 scanPtr += recordSize;
795 }
796 }
797
798 ALOGI("TRACE STOPPED%s: writing %d records",
799 state->overflow ? " (NOTE: overflowed buffer)" : "",
800 (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
801 if (gDvm.debuggerActive) {
802 ALOGW("WARNING: a debugger is active; method-tracing results "
803 "will be skewed");
804 }
805
806 /*
807 * Do a quick calibration test to see how expensive our clock call is.
808 */
809 u4 clockNsec = getClockOverhead();
810
811 markTouchedMethods(finalCurOffset);
812
813 char* memStreamPtr;
814 size_t memStreamSize;
815 if (state->directToDdms) {
816 assert(state->traceFile == NULL);
817 state->traceFile = open_memstream(&memStreamPtr, &memStreamSize);
818 if (state->traceFile == NULL) {
819 /* not expected */
820 ALOGE("Unable to open memstream");
821 dvmAbort();
822 }
823 }
824 assert(state->traceFile != NULL);
825
826 fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR);
827 fprintf(state->traceFile, "%d\n", state->traceVersion);
828 fprintf(state->traceFile, "data-file-overflow=%s\n",
829 state->overflow ? "true" : "false");
830 if (useThreadCpuClock()) {
831 if (useWallClock()) {
832 fprintf(state->traceFile, "clock=dual\n");
833 } else {
834 fprintf(state->traceFile, "clock=thread-cpu\n");
835 }
836 } else {
837 fprintf(state->traceFile, "clock=wall\n");
838 }
839 fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed);
840 fprintf(state->traceFile, "num-method-calls=%d\n",
841 (finalCurOffset - TRACE_HEADER_LEN) / state->recordSize);
842 fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec);
843 fprintf(state->traceFile, "vm=dalvik\n");
844 if ((state->flags & TRACE_ALLOC_COUNTS) != 0) {
845 fprintf(state->traceFile, "alloc-count=%d\n",
846 gDvm.allocProf.allocCount);
847 fprintf(state->traceFile, "alloc-size=%d\n",
848 gDvm.allocProf.allocSize);
849 fprintf(state->traceFile, "gc-count=%d\n",
850 gDvm.allocProf.gcCount);
851 }
852 fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR);
853 dumpThreadList(state->traceFile);
854 fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR);
855 dumpMethodList(state->traceFile);
856 fprintf(state->traceFile, "%cend\n", TOKEN_CHAR);
857
858 if (state->directToDdms) {
859 /*
860 * Data is in two places: memStreamPtr and state->buf. Send
861 * the whole thing to DDMS, wrapped in an MPSE packet.
862 */
863 fflush(state->traceFile);
864
865 struct iovec iov[2];
866 iov[0].iov_base = memStreamPtr;
867 iov[0].iov_len = memStreamSize;
868 iov[1].iov_base = state->buf;
869 iov[1].iov_len = finalCurOffset;
870 dvmDbgDdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
871 } else {
872 /* append the profiling data */
873 if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) {
874 int err = errno;
875 ALOGE("trace fwrite(%d) failed: %s",
876 finalCurOffset, strerror(err));
877 dvmThrowExceptionFmt(gDvm.exRuntimeException,
878 "Trace data write failed: %s", strerror(err));
879 }
880 }
881
882 /* done! */
883 free(state->buf);
884 state->buf = NULL;
885 fclose(state->traceFile);
886 state->traceFile = NULL;
887
888 /* free and clear sampling traces held by all threads */
889 if (samplingEnabled) {
890 freeThreadStackTraceSamples();
891 }
892
893 /* wake any threads that were waiting for profiling to complete */
894 dvmBroadcastCond(&state->threadExitCond);
895 dvmUnlockMutex(&state->startStopLock);
896
897 /* make sure the sampling thread has stopped */
898 if (samplingEnabled &&
899 pthread_join(state->samplingThreadHandle, NULL) != 0) {
900 ALOGW("Sampling thread join failed");
901 }
902 }
903
904 /*
905 * Read clocks and generate time diffs for method trace events.
906 */
dvmMethodTraceReadClocks(Thread * self,u4 * cpuClockDiff,u4 * wallClockDiff)907 void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
908 u4* wallClockDiff)
909 {
910 #if defined(HAVE_POSIX_CLOCKS)
911 if (useThreadCpuClock()) {
912 if (!self->cpuClockBaseSet) {
913 /* Initialize per-thread CPU clock base time on first use. */
914 self->cpuClockBase = getThreadCpuTimeInUsec(self);
915 self->cpuClockBaseSet = true;
916 } else {
917 *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase;
918 }
919 }
920 #endif
921 if (useWallClock()) {
922 *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen;
923 }
924 }
925
926 /*
927 * We just did something with a method. Emit a record.
928 *
929 * Multiple threads may be banging on this all at once. We use atomic ops
930 * rather than mutexes for speed.
931 */
dvmMethodTraceAdd(Thread * self,const Method * method,int action,u4 cpuClockDiff,u4 wallClockDiff)932 void dvmMethodTraceAdd(Thread* self, const Method* method, int action,
933 u4 cpuClockDiff, u4 wallClockDiff)
934 {
935 MethodTraceState* state = &gDvm.methodTrace;
936 u4 methodVal;
937 int oldOffset, newOffset;
938 u1* ptr;
939
940 assert(method != NULL);
941
942 /*
943 * Advance "curOffset" atomically.
944 */
945 do {
946 oldOffset = state->curOffset;
947 newOffset = oldOffset + state->recordSize;
948 if (newOffset > state->bufferSize) {
949 state->overflow = true;
950 return;
951 }
952 } while (android_atomic_release_cas(oldOffset, newOffset,
953 &state->curOffset) != 0);
954
955 //assert(METHOD_ACTION((u4) method) == 0);
956
957 methodVal = METHOD_COMBINE((u4) method, action);
958
959 /*
960 * Write data into "oldOffset".
961 */
962 ptr = state->buf + oldOffset;
963 *ptr++ = (u1) self->threadId;
964 *ptr++ = (u1) (self->threadId >> 8);
965 *ptr++ = (u1) methodVal;
966 *ptr++ = (u1) (methodVal >> 8);
967 *ptr++ = (u1) (methodVal >> 16);
968 *ptr++ = (u1) (methodVal >> 24);
969
970 #if defined(HAVE_POSIX_CLOCKS)
971 if (useThreadCpuClock()) {
972 *ptr++ = (u1) cpuClockDiff;
973 *ptr++ = (u1) (cpuClockDiff >> 8);
974 *ptr++ = (u1) (cpuClockDiff >> 16);
975 *ptr++ = (u1) (cpuClockDiff >> 24);
976 }
977 #endif
978
979 if (useWallClock()) {
980 *ptr++ = (u1) wallClockDiff;
981 *ptr++ = (u1) (wallClockDiff >> 8);
982 *ptr++ = (u1) (wallClockDiff >> 16);
983 *ptr++ = (u1) (wallClockDiff >> 24);
984 }
985 }
986
987
988 /*
989 * Register the METHOD_TRACE_ENTER action for the fast interpreter and
990 * JIT'ed code.
991 */
dvmFastMethodTraceEnter(const Method * method,Thread * self)992 void dvmFastMethodTraceEnter(const Method* method, Thread* self)
993 {
994 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
995 u4 cpuClockDiff = 0;
996 u4 wallClockDiff = 0;
997 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
998 dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff,
999 wallClockDiff);
1000 }
1001 }
1002
1003 /*
1004 * Register the METHOD_TRACE_EXIT action for the fast interpreter and
1005 * JIT'ed code for methods. The about-to-return callee method can be
1006 * retrieved from self->interpSave.method.
1007 */
dvmFastMethodTraceExit(Thread * self)1008 void dvmFastMethodTraceExit(Thread* self)
1009 {
1010 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
1011 u4 cpuClockDiff = 0;
1012 u4 wallClockDiff = 0;
1013 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
1014 dvmMethodTraceAdd(self, self->interpSave.method,
1015 METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff);
1016 }
1017 }
1018
1019 /*
1020 * Register the METHOD_TRACE_EXIT action for the fast interpreter and
1021 * JIT'ed code for JNI methods. The about-to-return JNI callee method is passed
1022 * in explicitly. Also used for inline-execute.
1023 */
dvmFastNativeMethodTraceExit(const Method * method,Thread * self)1024 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
1025 {
1026 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
1027 u4 cpuClockDiff = 0;
1028 u4 wallClockDiff = 0;
1029 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
1030 dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff,
1031 wallClockDiff);
1032 }
1033 }
1034
1035 /*
1036 * We just did something with a method. Emit a record by setting a value
1037 * in a magic memory location.
1038 */
dvmEmitEmulatorTrace(const Method * method,int action)1039 void dvmEmitEmulatorTrace(const Method* method, int action)
1040 {
1041 #ifdef UPDATE_MAGIC_PAGE
1042 /*
1043 * We store the address of the Dalvik bytecodes to the memory-mapped
1044 * trace page for normal methods. We also trace calls to native
1045 * functions by storing the address of the native function to the
1046 * trace page.
1047 * Abstract methods don't have any bytecodes, so we don't trace them.
1048 * (Abstract methods are never called, but in Dalvik they can be
1049 * because we do a "late trap" to a native method to generate the
1050 * abstract method exception.)
1051 */
1052 if (dvmIsAbstractMethod(method))
1053 return;
1054
1055 u4* pMagic = (u4*) gDvm.emulatorTracePage;
1056 u4 addr;
1057
1058 if (dvmIsNativeMethod(method)) {
1059 /*
1060 * The "action" parameter is one of:
1061 * 0 = ENTER
1062 * 1 = EXIT
1063 * 2 = UNROLL
1064 * To help the trace tools reconstruct the runtime stack containing
1065 * a mix of normal plus native methods, we add 4 to the action if this
1066 * is a native method.
1067 */
1068 action += 4;
1069
1070 /*
1071 * Get the address of the native function.
1072 * This isn't the right address -- how do I get it?
1073 * Fortunately, the trace tools can get by without the address, but
1074 * it would be nice to fix this.
1075 */
1076 addr = (u4) method->nativeFunc;
1077 } else {
1078 /*
1079 * The dexlist output shows the &DexCode.insns offset value, which
1080 * is offset from the start of the base DEX header. Method.insns
1081 * is the absolute address, effectively offset from the start of
1082 * the optimized DEX header. We either need to return the
1083 * optimized DEX base file address offset by the right amount, or
1084 * take the "real" address and subtract off the size of the
1085 * optimized DEX header.
1086 *
1087 * Would be nice to factor this out at dexlist time, but we can't count
1088 * on having access to the correct optimized DEX file.
1089 */
1090 assert(method->insns != NULL);
1091 const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader;
1092 addr = (u4) method->insns - pOptHdr->dexOffset;
1093 }
1094
1095 *(pMagic+action) = addr;
1096 LOGVV("Set %p = 0x%08x (%s.%s)",
1097 pMagic+action, addr, method->clazz->descriptor, method->name);
1098 #endif
1099 }
1100
1101 /*
1102 * The GC calls this when it's about to start. We add a marker to the
1103 * trace output so the tool can exclude the GC cost from the results.
1104 */
dvmMethodTraceGCBegin()1105 void dvmMethodTraceGCBegin()
1106 {
1107 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceGcMethod);
1108 }
dvmMethodTraceGCEnd()1109 void dvmMethodTraceGCEnd()
1110 {
1111 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceGcMethod);
1112 }
1113
1114 /*
1115 * The class loader calls this when it's loading or initializing a class.
1116 */
dvmMethodTraceClassPrepBegin()1117 void dvmMethodTraceClassPrepBegin()
1118 {
1119 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
1120 }
dvmMethodTraceClassPrepEnd()1121 void dvmMethodTraceClassPrepEnd()
1122 {
1123 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
1124 }
1125
1126
1127 /*
1128 * Enable emulator trace info.
1129 */
dvmEmulatorTraceStart()1130 void dvmEmulatorTraceStart()
1131 {
1132 /* If we could not map the emulator trace page, then do not enable tracing */
1133 if (gDvm.emulatorTracePage == NULL)
1134 return;
1135
1136 /* in theory we should make this an atomic inc; in practice not important */
1137 gDvm.emulatorTraceEnableCount++;
1138 if (gDvm.emulatorTraceEnableCount == 1)
1139 ALOGD("--- emulator method traces enabled");
1140 updateActiveProfilers(kSubModeEmulatorTrace, true);
1141 }
1142
1143 /*
1144 * Disable emulator trace info.
1145 */
dvmEmulatorTraceStop()1146 void dvmEmulatorTraceStop()
1147 {
1148 if (gDvm.emulatorTraceEnableCount == 0) {
1149 ALOGE("ERROR: emulator tracing not enabled");
1150 return;
1151 }
1152 /* in theory we should make this an atomic inc; in practice not important */
1153 gDvm.emulatorTraceEnableCount--;
1154 if (gDvm.emulatorTraceEnableCount == 0)
1155 ALOGD("--- emulator method traces disabled");
1156 updateActiveProfilers(kSubModeEmulatorTrace,
1157 (gDvm.emulatorTraceEnableCount != 0));
1158 }
1159
1160
1161 /*
1162 * Start instruction counting.
1163 */
dvmStartInstructionCounting()1164 void dvmStartInstructionCounting()
1165 {
1166 /* in theory we should make this an atomic inc; in practice not important */
1167 gDvm.instructionCountEnableCount++;
1168 updateActiveProfilers(kSubModeInstCounting, true);
1169 }
1170
1171 /*
1172 * Stop instruction counting.
1173 */
dvmStopInstructionCounting()1174 void dvmStopInstructionCounting()
1175 {
1176 if (gDvm.instructionCountEnableCount == 0) {
1177 ALOGE("ERROR: instruction counting not enabled");
1178 dvmAbort();
1179 }
1180 gDvm.instructionCountEnableCount--;
1181 updateActiveProfilers(kSubModeInstCounting,
1182 (gDvm.instructionCountEnableCount != 0));
1183 }
1184
1185
1186 /*
1187 * Start alloc counting. Note this doesn't affect the "active profilers"
1188 * count, since the interpreter loop is not involved.
1189 */
dvmStartAllocCounting()1190 void dvmStartAllocCounting()
1191 {
1192 gDvm.allocProf.enabled = true;
1193 }
1194
1195 /*
1196 * Stop alloc counting.
1197 */
dvmStopAllocCounting()1198 void dvmStopAllocCounting()
1199 {
1200 gDvm.allocProf.enabled = false;
1201 }
1202