• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "trace.h"
18 
19 #include <sys/uio.h>
20 #include <unistd.h>
21 
22 #include "android-base/macros.h"
23 #include "android-base/stringprintf.h"
24 
25 #include "art_method-inl.h"
26 #include "base/casts.h"
27 #include "base/enums.h"
28 #include "base/os.h"
29 #include "base/stl_util.h"
30 #include "base/systrace.h"
31 #include "base/time_utils.h"
32 #include "base/unix_file/fd_file.h"
33 #include "base/utils.h"
34 #include "class_linker.h"
35 #include "common_throws.h"
36 #include "debugger.h"
37 #include "dex/descriptors_names.h"
38 #include "dex/dex_file-inl.h"
39 #include "entrypoints/quick/quick_entrypoints.h"
40 #include "gc/scoped_gc_critical_section.h"
41 #include "instrumentation.h"
42 #include "jit/jit.h"
43 #include "jit/jit_code_cache.h"
44 #include "mirror/class-inl.h"
45 #include "mirror/dex_cache-inl.h"
46 #include "mirror/object-inl.h"
47 #include "mirror/object_array-inl.h"
48 #include "nativehelper/scoped_local_ref.h"
49 #include "scoped_thread_state_change-inl.h"
50 #include "stack.h"
51 #include "thread.h"
52 #include "thread_list.h"
53 
54 namespace art {
55 
56 using android::base::StringPrintf;
57 
58 static constexpr size_t TraceActionBits = MinimumBitsToStore(
59     static_cast<size_t>(kTraceMethodActionMask));
60 static constexpr uint8_t kOpNewMethod = 1U;
61 static constexpr uint8_t kOpNewThread = 2U;
62 static constexpr uint8_t kOpTraceSummary = 3U;
63 
64 static const char     kTraceTokenChar             = '*';
65 static const uint16_t kTraceHeaderLength          = 32;
66 static const uint32_t kTraceMagicValue            = 0x574f4c53;
67 static const uint16_t kTraceVersionSingleClock    = 2;
68 static const uint16_t kTraceVersionDualClock      = 3;
69 static const uint16_t kTraceRecordSizeSingleClock = 10;  // using v2
70 static const uint16_t kTraceRecordSizeDualClock   = 14;  // using v3 with two timestamps
71 
72 TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource;
73 
74 Trace* volatile Trace::the_trace_ = nullptr;
75 pthread_t Trace::sampling_pthread_ = 0U;
76 std::unique_ptr<std::vector<ArtMethod*>> Trace::temp_stack_trace_;
77 
78 // The key identifying the tracer to update instrumentation.
79 static constexpr const char* kTracerInstrumentationKey = "Tracer";
80 
DecodeTraceAction(uint32_t tmid)81 static TraceAction DecodeTraceAction(uint32_t tmid) {
82   return static_cast<TraceAction>(tmid & kTraceMethodActionMask);
83 }
84 
85 namespace {
86 // Scaling factor to convert timestamp counter into wall clock time reported in micro seconds.
87 // This is initialized at the start of tracing using the timestamp counter update frequency.
88 // See InitializeTimestampCounters for more details.
89 double tsc_to_microsec_scaling_factor = -1.0;
90 
GetTimestamp()91 uint64_t GetTimestamp() {
92   uint64_t t = 0;
93 #if defined(__arm__)
94   // On ARM 32 bit, we don't always have access to the timestamp counters from user space. There is
95   // no easy way to check if it is safe to read the timestamp counters. There is HWCAP_EVTSTRM which
96   // is set when generic timer is available but not necessarily from the user space. Kernel disables
97   // access to generic timer when there are known problems on the target CPUs. Sometimes access is
98   // disabled only for 32-bit processes even when 64-bit processes can accesses the timer from user
99   // space. These are not reflected in the HWCAP_EVTSTRM capability.So just fallback to
100   // clock_gettime on these processes. See b/289178149 for more discussion.
101   t = MicroTime();
102 #elif defined(__aarch64__)
103   // See Arm Architecture Registers  Armv8 section System Registers
104   asm volatile("mrs %0, cntvct_el0" : "=r"(t));
105 #elif defined(__i386__) || defined(__x86_64__)
106   // rdtsc returns two 32-bit values in rax and rdx even on 64-bit architectures.
107   unsigned int lo, hi;
108   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
109   t = (static_cast<uint64_t>(hi) << 32) | lo;
110 #else
111   t = MicroTime();
112 #endif
113   return t;
114 }
115 
116 #if defined(__i386__) || defined(__x86_64__)
117 // Here we compute the scaling factor by sleeping for a millisecond. Alternatively, we could
118 // generate raw timestamp counter and also time using clock_gettime at the start and the end of the
119 // trace. We can compute the frequency of timestamp counter upadtes in the post processing step
120 // using these two samples. However, that would require a change in Android Studio which is the main
121 // consumer of these profiles. For now, just compute the frequency of tsc updates here.
computeScalingFactor()122 double computeScalingFactor() {
123   uint64_t start = MicroTime();
124   uint64_t start_tsc = GetTimestamp();
125   // Sleep for one millisecond.
126   usleep(1000);
127   uint64_t diff_tsc = GetTimestamp() - start_tsc;
128   uint64_t diff_time = MicroTime() - start;
129   double scaling_factor = static_cast<double>(diff_time) / diff_tsc;
130   DCHECK(scaling_factor > 0.0) << scaling_factor;
131   return scaling_factor;
132 }
133 
GetScalingFactorForX86()134 double GetScalingFactorForX86() {
135   uint32_t eax, ebx, ecx;
136   asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x0), "c"(0));
137   if (eax < 0x15) {
138     // There is no 15H - Timestamp counter and core crystal clock information
139     // leaf. Just compute the frequency.
140     return computeScalingFactor();
141   }
142 
143   // From Intel architecture-instruction-set-extensions-programming-reference:
144   // EBX[31:0]/EAX[31:0] indicates the ratio of the TSC frequency and the
145   // core crystal clock frequency.
146   // If EBX[31:0] is 0, the TSC and "core crystal clock" ratio is not enumerated.
147   // If ECX is 0, the nominal core crystal clock frequency is not enumerated.
148   // "TSC frequency" = "core crystal clock frequency" * EBX/EAX.
149   // The core crystal clock may differ from the reference clock, bus clock, or core clock
150   // frequencies.
151   // EAX Bits 31 - 00: An unsigned integer which is the denominator of the
152   //                   TSC/"core crystal clock" ratio.
153   // EBX Bits 31 - 00: An unsigned integer which is the numerator of the
154   //                   TSC/"core crystal clock" ratio.
155   // ECX Bits 31 - 00: An unsigned integer which is the nominal frequency of the core
156   //                   crystal clock in Hz.
157   // EDX Bits 31 - 00: Reserved = 0.
158   asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x15), "c"(0));
159   if (ebx == 0 || ecx == 0) {
160     return computeScalingFactor();
161   }
162   double coreCrystalFreq = ecx;
163   // frequency = coreCrystalFreq * (ebx / eax)
164   // scaling_factor = seconds_to_microseconds / frequency
165   //                = seconds_to_microseconds * eax / (coreCrystalFreq * ebx)
166   double seconds_to_microseconds = 1000 * 1000;
167   double scaling_factor = (seconds_to_microseconds * eax) / (coreCrystalFreq * ebx);
168   return scaling_factor;
169 }
170 #endif
171 
InitializeTimestampCounters()172 void InitializeTimestampCounters() {
173   // It is sufficient to initialize this once for the entire execution. Just return if it is
174   // already initialized.
175   if (tsc_to_microsec_scaling_factor > 0.0) {
176     return;
177   }
178 
179 #if defined(__arm__)
180   // On ARM 32 bit, we don't always have access to the timestamp counters from
181   // user space. Seem comment in GetTimestamp for more details.
182   tsc_to_microsec_scaling_factor = 1.0;
183 #elif defined(__aarch64__)
184   double seconds_to_microseconds = 1000 * 1000;
185   uint64_t freq = 0;
186   // See Arm Architecture Registers  Armv8 section System Registers
187   asm volatile("mrs %0,  cntfrq_el0" : "=r"(freq));
188   tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast<double>(freq);
189 #elif defined(__i386__) || defined(__x86_64__)
190   tsc_to_microsec_scaling_factor = GetScalingFactorForX86();
191 #else
192   tsc_to_microsec_scaling_factor = 1.0;
193 #endif
194 }
195 
GetMicroTime(uint64_t counter)196 ALWAYS_INLINE uint64_t GetMicroTime(uint64_t counter) {
197   DCHECK(tsc_to_microsec_scaling_factor > 0.0) << tsc_to_microsec_scaling_factor;
198   return tsc_to_microsec_scaling_factor * counter;
199 }
200 
201 }  // namespace
202 
DecodeTraceMethod(uint32_t tmid)203 ArtMethod* Trace::DecodeTraceMethod(uint32_t tmid) {
204   uint32_t method_index = tmid >> TraceActionBits;
205   // This is used only for logging which is usually needed only for debugging ART. So it's not
206   // performance critical.
207   for (auto const& entry : art_method_id_map_) {
208     if (method_index == entry.second) {
209       return entry.first;
210     }
211   }
212   return nullptr;
213 }
214 
EncodeTraceMethod(ArtMethod * method)215 uint32_t Trace::EncodeTraceMethod(ArtMethod* method) {
216   uint32_t idx = 0;
217   auto it = art_method_id_map_.find(method);
218   if (it != art_method_id_map_.end()) {
219     idx = it->second;
220   } else {
221     idx = current_method_index_;
222     art_method_id_map_.emplace(method, idx);
223     current_method_index_++;
224   }
225   return idx;
226 }
227 
AllocStackTrace()228 std::vector<ArtMethod*>* Trace::AllocStackTrace() {
229   return (temp_stack_trace_.get() != nullptr)  ? temp_stack_trace_.release() :
230       new std::vector<ArtMethod*>();
231 }
232 
FreeStackTrace(std::vector<ArtMethod * > * stack_trace)233 void Trace::FreeStackTrace(std::vector<ArtMethod*>* stack_trace) {
234   stack_trace->clear();
235   temp_stack_trace_.reset(stack_trace);
236 }
237 
SetDefaultClockSource(TraceClockSource clock_source)238 void Trace::SetDefaultClockSource(TraceClockSource clock_source) {
239 #if defined(__linux__)
240   default_clock_source_ = clock_source;
241 #else
242   if (clock_source != TraceClockSource::kWall) {
243     LOG(WARNING) << "Ignoring tracing request to use CPU time.";
244   }
245 #endif
246 }
247 
GetTraceVersion(TraceClockSource clock_source)248 static uint16_t GetTraceVersion(TraceClockSource clock_source) {
249   return (clock_source == TraceClockSource::kDual) ? kTraceVersionDualClock
250                                                     : kTraceVersionSingleClock;
251 }
252 
GetRecordSize(TraceClockSource clock_source)253 static uint16_t GetRecordSize(TraceClockSource clock_source) {
254   return (clock_source == TraceClockSource::kDual) ? kTraceRecordSizeDualClock
255                                                     : kTraceRecordSizeSingleClock;
256 }
257 
UseThreadCpuClock()258 bool Trace::UseThreadCpuClock() {
259   return (clock_source_ == TraceClockSource::kThreadCpu) ||
260       (clock_source_ == TraceClockSource::kDual);
261 }
262 
UseWallClock()263 bool Trace::UseWallClock() {
264   return (clock_source_ == TraceClockSource::kWall) ||
265       (clock_source_ == TraceClockSource::kDual);
266 }
267 
MeasureClockOverhead()268 void Trace::MeasureClockOverhead() {
269   if (UseThreadCpuClock()) {
270     Thread::Current()->GetCpuMicroTime();
271   }
272   if (UseWallClock()) {
273     GetTimestamp();
274   }
275 }
276 
277 // Compute an average time taken to measure clocks.
GetClockOverheadNanoSeconds()278 uint32_t Trace::GetClockOverheadNanoSeconds() {
279   Thread* self = Thread::Current();
280   uint64_t start = self->GetCpuMicroTime();
281 
282   for (int i = 4000; i > 0; i--) {
283     MeasureClockOverhead();
284     MeasureClockOverhead();
285     MeasureClockOverhead();
286     MeasureClockOverhead();
287     MeasureClockOverhead();
288     MeasureClockOverhead();
289     MeasureClockOverhead();
290     MeasureClockOverhead();
291   }
292 
293   uint64_t elapsed_us = self->GetCpuMicroTime() - start;
294   return static_cast<uint32_t>(elapsed_us / 32);
295 }
296 
297 // TODO: put this somewhere with the big-endian equivalent used by JDWP.
Append2LE(uint8_t * buf,uint16_t val)298 static void Append2LE(uint8_t* buf, uint16_t val) {
299   *buf++ = static_cast<uint8_t>(val);
300   *buf++ = static_cast<uint8_t>(val >> 8);
301 }
302 
303 // TODO: put this somewhere with the big-endian equivalent used by JDWP.
Append4LE(uint8_t * buf,uint32_t val)304 static void Append4LE(uint8_t* buf, uint32_t val) {
305   *buf++ = static_cast<uint8_t>(val);
306   *buf++ = static_cast<uint8_t>(val >> 8);
307   *buf++ = static_cast<uint8_t>(val >> 16);
308   *buf++ = static_cast<uint8_t>(val >> 24);
309 }
310 
311 // TODO: put this somewhere with the big-endian equivalent used by JDWP.
Append8LE(uint8_t * buf,uint64_t val)312 static void Append8LE(uint8_t* buf, uint64_t val) {
313   *buf++ = static_cast<uint8_t>(val);
314   *buf++ = static_cast<uint8_t>(val >> 8);
315   *buf++ = static_cast<uint8_t>(val >> 16);
316   *buf++ = static_cast<uint8_t>(val >> 24);
317   *buf++ = static_cast<uint8_t>(val >> 32);
318   *buf++ = static_cast<uint8_t>(val >> 40);
319   *buf++ = static_cast<uint8_t>(val >> 48);
320   *buf++ = static_cast<uint8_t>(val >> 56);
321 }
322 
GetSample(Thread * thread,void * arg)323 static void GetSample(Thread* thread, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) {
324   std::vector<ArtMethod*>* const stack_trace = Trace::AllocStackTrace();
325   StackVisitor::WalkStack(
326       [&](const art::StackVisitor* stack_visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
327         ArtMethod* m = stack_visitor->GetMethod();
328         // Ignore runtime frames (in particular callee save).
329         if (!m->IsRuntimeMethod()) {
330           stack_trace->push_back(m);
331         }
332         return true;
333       },
334       thread,
335       /* context= */ nullptr,
336       art::StackVisitor::StackWalkKind::kIncludeInlinedFrames);
337   Trace* the_trace = reinterpret_cast<Trace*>(arg);
338   the_trace->CompareAndUpdateStackTrace(thread, stack_trace);
339 }
340 
ClearThreadStackTraceAndClockBase(Thread * thread,void * arg ATTRIBUTE_UNUSED)341 static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
342   thread->SetTraceClockBase(0);
343   std::vector<ArtMethod*>* stack_trace = thread->GetStackTraceSample();
344   thread->SetStackTraceSample(nullptr);
345   delete stack_trace;
346 }
347 
CompareAndUpdateStackTrace(Thread * thread,std::vector<ArtMethod * > * stack_trace)348 void Trace::CompareAndUpdateStackTrace(Thread* thread,
349                                        std::vector<ArtMethod*>* stack_trace) {
350   CHECK_EQ(pthread_self(), sampling_pthread_);
351   std::vector<ArtMethod*>* old_stack_trace = thread->GetStackTraceSample();
352   // Update the thread's stack trace sample.
353   thread->SetStackTraceSample(stack_trace);
354   // Read timer clocks to use for all events in this trace.
355   uint32_t thread_clock_diff = 0;
356   uint64_t timestamp_counter = 0;
357   ReadClocks(thread, &thread_clock_diff, &timestamp_counter);
358   if (old_stack_trace == nullptr) {
359     // If there's no previous stack trace sample for this thread, log an entry event for all
360     // methods in the trace.
361     for (auto rit = stack_trace->rbegin(); rit != stack_trace->rend(); ++rit) {
362       LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
363     }
364   } else {
365     // If there's a previous stack trace for this thread, diff the traces and emit entry and exit
366     // events accordingly.
367     auto old_rit = old_stack_trace->rbegin();
368     auto rit = stack_trace->rbegin();
369     // Iterate bottom-up over both traces until there's a difference between them.
370     while (old_rit != old_stack_trace->rend() && rit != stack_trace->rend() && *old_rit == *rit) {
371       old_rit++;
372       rit++;
373     }
374     // Iterate top-down over the old trace until the point where they differ, emitting exit events.
375     for (auto old_it = old_stack_trace->begin(); old_it != old_rit.base(); ++old_it) {
376       LogMethodTraceEvent(thread, *old_it, kTraceMethodExit, thread_clock_diff, timestamp_counter);
377     }
378     // Iterate bottom-up over the new trace from the point where they differ, emitting entry events.
379     for (; rit != stack_trace->rend(); ++rit) {
380       LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
381     }
382     FreeStackTrace(old_stack_trace);
383   }
384 }
385 
RunSamplingThread(void * arg)386 void* Trace::RunSamplingThread(void* arg) {
387   Runtime* runtime = Runtime::Current();
388   intptr_t interval_us = reinterpret_cast<intptr_t>(arg);
389   CHECK_GE(interval_us, 0);
390   CHECK(runtime->AttachCurrentThread("Sampling Profiler", true, runtime->GetSystemThreadGroup(),
391                                      !runtime->IsAotCompiler()));
392 
393   while (true) {
394     usleep(interval_us);
395     ScopedTrace trace("Profile sampling");
396     Thread* self = Thread::Current();
397     Trace* the_trace;
398     {
399       MutexLock mu(self, *Locks::trace_lock_);
400       the_trace = the_trace_;
401       if (the_trace_->stop_tracing_) {
402         break;
403       }
404     }
405     {
406       // Avoid a deadlock between a thread doing garbage collection
407       // and the profile sampling thread, by blocking GC when sampling
408       // thread stacks (see b/73624630).
409       gc::ScopedGCCriticalSection gcs(self,
410                                       art::gc::kGcCauseInstrumentation,
411                                       art::gc::kCollectorTypeInstrumentation);
412       ScopedSuspendAll ssa(__FUNCTION__);
413       MutexLock mu(self, *Locks::thread_list_lock_);
414       runtime->GetThreadList()->ForEach(GetSample, the_trace);
415     }
416   }
417 
418   runtime->DetachCurrentThread();
419   return nullptr;
420 }
421 
Start(const char * trace_filename,size_t buffer_size,int flags,TraceOutputMode output_mode,TraceMode trace_mode,int interval_us)422 void Trace::Start(const char* trace_filename,
423                   size_t buffer_size,
424                   int flags,
425                   TraceOutputMode output_mode,
426                   TraceMode trace_mode,
427                   int interval_us) {
428   std::unique_ptr<File> file(OS::CreateEmptyFileWriteOnly(trace_filename));
429   if (file == nullptr) {
430     std::string msg = android::base::StringPrintf("Unable to open trace file '%s'", trace_filename);
431     PLOG(ERROR) << msg;
432     ScopedObjectAccess soa(Thread::Current());
433     Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str());
434     return;
435   }
436   Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us);
437 }
438 
Start(int trace_fd,size_t buffer_size,int flags,TraceOutputMode output_mode,TraceMode trace_mode,int interval_us)439 void Trace::Start(int trace_fd,
440                   size_t buffer_size,
441                   int flags,
442                   TraceOutputMode output_mode,
443                   TraceMode trace_mode,
444                   int interval_us) {
445   if (trace_fd < 0) {
446     std::string msg = android::base::StringPrintf("Unable to start tracing with invalid fd %d",
447                                                   trace_fd);
448     LOG(ERROR) << msg;
449     ScopedObjectAccess soa(Thread::Current());
450     Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", msg.c_str());
451     return;
452   }
453   std::unique_ptr<File> file(new File(trace_fd, /* path= */ "tracefile", /* check_usage= */ true));
454   Start(std::move(file), buffer_size, flags, output_mode, trace_mode, interval_us);
455 }
456 
StartDDMS(size_t buffer_size,int flags,TraceMode trace_mode,int interval_us)457 void Trace::StartDDMS(size_t buffer_size,
458                       int flags,
459                       TraceMode trace_mode,
460                       int interval_us) {
461   Start(std::unique_ptr<File>(),
462         buffer_size,
463         flags,
464         TraceOutputMode::kDDMS,
465         trace_mode,
466         interval_us);
467 }
468 
Start(std::unique_ptr<File> && trace_file_in,size_t buffer_size,int flags,TraceOutputMode output_mode,TraceMode trace_mode,int interval_us)469 void Trace::Start(std::unique_ptr<File>&& trace_file_in,
470                   size_t buffer_size,
471                   int flags,
472                   TraceOutputMode output_mode,
473                   TraceMode trace_mode,
474                   int interval_us) {
475   // We own trace_file now and are responsible for closing it. To account for error situations, use
476   // a specialized unique_ptr to ensure we close it on the way out (if it hasn't been passed to a
477   // Trace instance).
478   auto deleter = [](File* file) {
479     if (file != nullptr) {
480       file->MarkUnchecked();  // Don't deal with flushing requirements.
481       int result ATTRIBUTE_UNUSED = file->Close();
482       delete file;
483     }
484   };
485   std::unique_ptr<File, decltype(deleter)> trace_file(trace_file_in.release(), deleter);
486 
487   Thread* self = Thread::Current();
488   {
489     MutexLock mu(self, *Locks::trace_lock_);
490     if (the_trace_ != nullptr) {
491       LOG(ERROR) << "Trace already in progress, ignoring this request";
492       return;
493     }
494   }
495 
496   // Check interval if sampling is enabled
497   if (trace_mode == TraceMode::kSampling && interval_us <= 0) {
498     LOG(ERROR) << "Invalid sampling interval: " << interval_us;
499     ScopedObjectAccess soa(self);
500     ThrowRuntimeException("Invalid sampling interval: %d", interval_us);
501     return;
502   }
503 
504   // Initialize the frequency of timestamp counter updates here. This is needed
505   // to get wallclock time from timestamp counter values.
506   InitializeTimestampCounters();
507 
508   Runtime* runtime = Runtime::Current();
509 
510   // Enable count of allocs if specified in the flags.
511   bool enable_stats = false;
512 
513   // Create Trace object.
514   {
515     // Suspend JIT here since we are switching runtime to debuggable. Debuggable runtimes cannot use
516     // JITed code from before so we need to invalidated all JITed code here. Enter suspend JIT scope
517     // to prevent any races with ongoing JIT compilations.
518     jit::ScopedJitSuspend suspend_jit;
519     // Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes.
520     gc::ScopedGCCriticalSection gcs(self,
521                                     gc::kGcCauseInstrumentation,
522                                     gc::kCollectorTypeInstrumentation);
523     ScopedSuspendAll ssa(__FUNCTION__);
524     MutexLock mu(self, *Locks::trace_lock_);
525     if (the_trace_ != nullptr) {
526       LOG(ERROR) << "Trace already in progress, ignoring this request";
527     } else {
528       enable_stats = (flags & kTraceCountAllocs) != 0;
529       the_trace_ = new Trace(trace_file.release(), buffer_size, flags, output_mode, trace_mode);
530       if (trace_mode == TraceMode::kSampling) {
531         CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
532                                             reinterpret_cast<void*>(interval_us)),
533                                             "Sampling profiler thread");
534         the_trace_->interval_us_ = interval_us;
535       } else {
536         if (!runtime->IsJavaDebuggable()) {
537           art::jit::Jit* jit = runtime->GetJit();
538           if (jit != nullptr) {
539             jit->GetCodeCache()->InvalidateAllCompiledCode();
540             jit->GetCodeCache()->TransitionToDebuggable();
541             jit->GetJitCompiler()->SetDebuggableCompilerOption(true);
542           }
543           runtime->SetRuntimeDebugState(art::Runtime::RuntimeDebugState::kJavaDebuggable);
544           runtime->GetInstrumentation()->UpdateEntrypointsForDebuggable();
545           runtime->DeoptimizeBootImage();
546         }
547         runtime->GetInstrumentation()->AddListener(
548             the_trace_,
549             instrumentation::Instrumentation::kMethodEntered |
550                 instrumentation::Instrumentation::kMethodExited |
551                 instrumentation::Instrumentation::kMethodUnwind);
552         // TODO: In full-PIC mode, we don't need to fully deopt.
553         // TODO: We can only use trampoline entrypoints if we are java-debuggable since in that case
554         // we know that inlining and other problematic optimizations are disabled. We might just
555         // want to use the trampolines anyway since it is faster. It makes the story with disabling
556         // jit-gc more complex though.
557         runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey,
558                                                            the_trace_,
559                                                            /*needs_interpreter=*/false);
560       }
561     }
562   }
563 
564   // Can't call this when holding the mutator lock.
565   if (enable_stats) {
566     runtime->SetStatsEnabled(true);
567   }
568 }
569 
UpdateThreadsList(Thread * thread)570 void Trace::UpdateThreadsList(Thread* thread) {
571   // TODO(mythria): Clean this up and update threads_list_ when recording the trace event similar
572   // to what we do for streaming case.
573   std::string name;
574   thread->GetThreadName(name);
575   // In tests, we destroy VM after already detaching the current thread. When a thread is
576   // detached we record the information about the threads_list_. We re-attach the current
577   // thread again as a "Shutdown thread" in the process of shutting down. So don't record
578   // information about shutdown threads.
579   if (name.compare("Shutdown thread") == 0) {
580     return;
581   }
582 
583   // There can be races when unregistering a thread and stopping the trace and it is possible to
584   // update the list twice. For example, This information is updated here when stopping tracing and
585   // also when a thread is detaching. In thread detach, we first update this information and then
586   // remove the thread from the list of active threads. If the tracing was stopped in between these
587   // events, we can see two updates for the same thread. Since we need a trace_lock_ it isn't easy
588   // to prevent this race (for ex: update this information when holding thread_list_lock_). It is
589   // harmless to do two updates so just use overwrite here.
590   threads_list_.Overwrite(thread->GetTid(), name);
591 }
592 
StopTracing(bool finish_tracing,bool flush_file)593 void Trace::StopTracing(bool finish_tracing, bool flush_file) {
594   Runtime* const runtime = Runtime::Current();
595   Thread* const self = Thread::Current();
596   pthread_t sampling_pthread = 0U;
597   {
598     MutexLock mu(self, *Locks::trace_lock_);
599     if (the_trace_ == nullptr) {
600       LOG(ERROR) << "Trace stop requested, but no trace currently running";
601       return;
602     }
603     // Tell sampling_pthread_ to stop tracing.
604     the_trace_->stop_tracing_ = true;
605     sampling_pthread = sampling_pthread_;
606   }
607 
608   // Make sure that we join before we delete the trace since we don't want to have
609   // the sampling thread access a stale pointer. This finishes since the sampling thread exits when
610   // the_trace_ is null.
611   if (sampling_pthread != 0U) {
612     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
613   }
614 
615   // Make a copy of the_trace_, so it can be flushed later. We want to reset
616   // the_trace_ to nullptr in suspend all scope to prevent any races
617   Trace* the_trace = the_trace_;
618   bool stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
619   // Stop the trace sources adding more entries to the trace buffer and synchronise stores.
620   {
621     gc::ScopedGCCriticalSection gcs(
622         self, gc::kGcCauseInstrumentation, gc::kCollectorTypeInstrumentation);
623     jit::ScopedJitSuspend suspend_jit;
624     ScopedSuspendAll ssa(__FUNCTION__);
625 
626     if (the_trace->trace_mode_ == TraceMode::kSampling) {
627       MutexLock mu(self, *Locks::thread_list_lock_);
628       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
629     } else {
630       runtime->GetInstrumentation()->RemoveListener(
631           the_trace,
632           instrumentation::Instrumentation::kMethodEntered |
633               instrumentation::Instrumentation::kMethodExited |
634               instrumentation::Instrumentation::kMethodUnwind);
635       runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
636       runtime->GetInstrumentation()->MaybeSwitchRuntimeDebugState(self);
637     }
638 
639     // Flush thread specific buffer from all threads before resetting the_trace_ to nullptr.
640     // We also flush the buffer when destroying a thread which expects the_trace_ to be valid so
641     // make sure that the per-thread buffer is reset before resetting the_trace_.
642     {
643       MutexLock tl_lock(Thread::Current(), *Locks::thread_list_lock_);
644       for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) {
645         if (thread->GetMethodTraceBuffer() != nullptr) {
646           the_trace_->FlushStreamingBuffer(thread);
647           thread->ResetMethodTraceBuffer();
648         }
649         // Record threads here before resetting the_trace_ to prevent any races between
650         // unregistering the thread and resetting the_trace_.
651         the_trace->UpdateThreadsList(thread);
652       }
653     }
654 
655     // Reset the_trace_ by taking a trace_lock
656     MutexLock mu(self, *Locks::trace_lock_);
657     the_trace_ = nullptr;
658     sampling_pthread_ = 0U;
659   }
660 
661   // At this point, code may read buf_ as its writers are shutdown
662   // and the ScopedSuspendAll above has ensured all stores to buf_
663   // are now visible.
664   if (finish_tracing) {
665     the_trace->FinishTracing();
666   }
667   if (the_trace->trace_file_.get() != nullptr) {
668     // Do not try to erase, so flush and close explicitly.
669     if (flush_file) {
670       if (the_trace->trace_file_->Flush() != 0) {
671         PLOG(WARNING) << "Could not flush trace file.";
672       }
673     } else {
674       the_trace->trace_file_->MarkUnchecked();  // Do not trigger guard.
675     }
676     if (the_trace->trace_file_->Close() != 0) {
677       PLOG(ERROR) << "Could not close trace file.";
678     }
679   }
680   delete the_trace;
681 
682   if (stop_alloc_counting) {
683     // Can be racy since SetStatsEnabled is not guarded by any locks.
684     runtime->SetStatsEnabled(false);
685   }
686 }
687 
FlushThreadBuffer(Thread * self)688 void Trace::FlushThreadBuffer(Thread* self) {
689   MutexLock mu(self, *Locks::trace_lock_);
690   the_trace_->FlushStreamingBuffer(self);
691 }
692 
Abort()693 void Trace::Abort() {
694   // Do not write anything anymore.
695   StopTracing(false, false);
696 }
697 
Stop()698 void Trace::Stop() {
699   // Finish writing.
700   StopTracing(true, true);
701 }
702 
Shutdown()703 void Trace::Shutdown() {
704   if (GetMethodTracingMode() != kTracingInactive) {
705     Stop();
706   }
707 }
708 
GetMethodTracingMode()709 TracingMode Trace::GetMethodTracingMode() {
710   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
711   if (the_trace_ == nullptr) {
712     return kTracingInactive;
713   } else {
714     switch (the_trace_->trace_mode_) {
715       case TraceMode::kSampling:
716         return kSampleProfilingActive;
717       case TraceMode::kMethodTracing:
718         return kMethodTracingActive;
719     }
720     LOG(FATAL) << "Unreachable";
721     UNREACHABLE();
722   }
723 }
724 
725 static constexpr size_t kMinBufSize = 18U;  // Trace header is up to 18B.
726 // Size of per-thread buffer size. The value is chosen arbitrarily. This value
727 // should be greater than kMinBufSize.
728 static constexpr size_t kPerThreadBufSize = 512 * 1024;
729 static_assert(kPerThreadBufSize > kMinBufSize);
730 
731 namespace {
732 
GetClockSourceFromFlags(int flags)733 TraceClockSource GetClockSourceFromFlags(int flags) {
734   bool need_wall = flags & Trace::TraceFlag::kTraceClockSourceWallClock;
735   bool need_thread_cpu = flags & Trace::TraceFlag::kTraceClockSourceThreadCpu;
736   if (need_wall && need_thread_cpu) {
737     return TraceClockSource::kDual;
738   } else if (need_wall) {
739     return TraceClockSource::kWall;
740   } else if (need_thread_cpu) {
741     return TraceClockSource::kThreadCpu;
742   } else {
743     return kDefaultTraceClockSource;
744   }
745 }
746 
747 }  // namespace
748 
Trace(File * trace_file,size_t buffer_size,int flags,TraceOutputMode output_mode,TraceMode trace_mode)749 Trace::Trace(File* trace_file,
750              size_t buffer_size,
751              int flags,
752              TraceOutputMode output_mode,
753              TraceMode trace_mode)
754     : trace_file_(trace_file),
755       buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()),
756       flags_(flags),
757       trace_output_mode_(output_mode),
758       trace_mode_(trace_mode),
759       clock_source_(GetClockSourceFromFlags(flags)),
760       buffer_size_(std::max(kMinBufSize, buffer_size)),
761       start_time_(GetMicroTime(GetTimestamp())),
762       clock_overhead_ns_(GetClockOverheadNanoSeconds()),
763       overflow_(false),
764       interval_us_(0),
765       stop_tracing_(false),
766       tracing_lock_("tracing lock", LockLevel::kTracingStreamingLock) {
767   CHECK_IMPLIES(trace_file == nullptr, output_mode == TraceOutputMode::kDDMS);
768 
769   uint16_t trace_version = GetTraceVersion(clock_source_);
770   if (output_mode == TraceOutputMode::kStreaming) {
771     trace_version |= 0xF0U;
772   }
773   // Set up the beginning of the trace.
774   memset(buf_.get(), 0, kTraceHeaderLength);
775   Append4LE(buf_.get(), kTraceMagicValue);
776   Append2LE(buf_.get() + 4, trace_version);
777   Append2LE(buf_.get() + 6, kTraceHeaderLength);
778   Append8LE(buf_.get() + 8, start_time_);
779   if (trace_version >= kTraceVersionDualClock) {
780     uint16_t record_size = GetRecordSize(clock_source_);
781     Append2LE(buf_.get() + 16, record_size);
782   }
783   static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header");
784 
785   cur_offset_.store(kTraceHeaderLength, std::memory_order_relaxed);
786 
787   if (output_mode == TraceOutputMode::kStreaming) {
788     // Flush the header information to the file. We use a per thread buffer, so
789     // it is easier to just write the header information directly to file.
790     if (!trace_file_->WriteFully(buf_.get(), kTraceHeaderLength)) {
791       PLOG(WARNING) << "Failed streaming a tracing event.";
792     }
793     cur_offset_.store(0, std::memory_order_relaxed);
794   }
795 }
796 
ReadBytes(uint8_t * buf,size_t bytes)797 static uint64_t ReadBytes(uint8_t* buf, size_t bytes) {
798   uint64_t ret = 0;
799   for (size_t i = 0; i < bytes; ++i) {
800     ret |= static_cast<uint64_t>(buf[i]) << (i * 8);
801   }
802   return ret;
803 }
804 
DumpBuf(uint8_t * buf,size_t buf_size,TraceClockSource clock_source)805 void Trace::DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source) {
806   uint8_t* ptr = buf + kTraceHeaderLength;
807   uint8_t* end = buf + buf_size;
808 
809   MutexLock mu(Thread::Current(), tracing_lock_);
810   while (ptr < end) {
811     uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid));
812     ArtMethod* method = DecodeTraceMethod(tmid);
813     TraceAction action = DecodeTraceAction(tmid);
814     LOG(INFO) << ArtMethod::PrettyMethod(method) << " " << static_cast<int>(action);
815     ptr += GetRecordSize(clock_source);
816   }
817 }
818 
FinishTracing()819 void Trace::FinishTracing() {
820   size_t final_offset = 0;
821   if (trace_output_mode_ != TraceOutputMode::kStreaming) {
822     final_offset = cur_offset_.load(std::memory_order_relaxed);
823   }
824 
825   // Compute elapsed time.
826   uint64_t elapsed = GetMicroTime(GetTimestamp()) - start_time_;
827 
828   std::ostringstream os;
829 
830   os << StringPrintf("%cversion\n", kTraceTokenChar);
831   os << StringPrintf("%d\n", GetTraceVersion(clock_source_));
832   os << StringPrintf("data-file-overflow=%s\n", overflow_ ? "true" : "false");
833   if (UseThreadCpuClock()) {
834     if (UseWallClock()) {
835       os << StringPrintf("clock=dual\n");
836     } else {
837       os << StringPrintf("clock=thread-cpu\n");
838     }
839   } else {
840     os << StringPrintf("clock=wall\n");
841   }
842   os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
843   if (trace_output_mode_ != TraceOutputMode::kStreaming) {
844     size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
845     os << StringPrintf("num-method-calls=%zd\n", num_records);
846   }
847   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns_);
848   os << StringPrintf("vm=art\n");
849   os << StringPrintf("pid=%d\n", getpid());
850   if ((flags_ & kTraceCountAllocs) != 0) {
851     os << "alloc-count=" << Runtime::Current()->GetStat(KIND_ALLOCATED_OBJECTS) << "\n";
852     os << "alloc-size=" << Runtime::Current()->GetStat(KIND_ALLOCATED_BYTES) << "\n";
853     os << "gc-count=" <<  Runtime::Current()->GetStat(KIND_GC_INVOCATIONS) << "\n";
854   }
855   os << StringPrintf("%cthreads\n", kTraceTokenChar);
856   DumpThreadList(os);
857   os << StringPrintf("%cmethods\n", kTraceTokenChar);
858   DumpMethodList(os);
859   os << StringPrintf("%cend\n", kTraceTokenChar);
860   std::string header(os.str());
861 
862   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
863     // It is expected that this method is called when all other threads are suspended, so there
864     // cannot be any writes to trace_file_ after finish tracing.
865     // Write a special token to mark the end of trace records and the start of
866     // trace summary.
867     uint8_t buf[7];
868     Append2LE(buf, 0);
869     buf[2] = kOpTraceSummary;
870     Append4LE(buf + 3, static_cast<uint32_t>(header.length()));
871     // Write the trace summary. The summary is identical to the file header when
872     // the output mode is not streaming (except for methods).
873     if (!trace_file_->WriteFully(buf, sizeof(buf)) ||
874         !trace_file_->WriteFully(header.c_str(), header.length())) {
875       PLOG(WARNING) << "Failed streaming a tracing event.";
876     }
877   } else {
878     if (trace_file_.get() == nullptr) {
879       std::vector<uint8_t> data;
880       data.resize(header.length() + final_offset);
881       memcpy(data.data(), header.c_str(), header.length());
882       memcpy(data.data() + header.length(), buf_.get(), final_offset);
883       Runtime::Current()->GetRuntimeCallbacks()->DdmPublishChunk(CHUNK_TYPE("MPSE"),
884                                                                  ArrayRef<const uint8_t>(data));
885       const bool kDumpTraceInfo = false;
886       if (kDumpTraceInfo) {
887         LOG(INFO) << "Trace sent:\n" << header;
888         DumpBuf(buf_.get(), final_offset, clock_source_);
889       }
890     } else {
891       if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
892           !trace_file_->WriteFully(buf_.get(), final_offset)) {
893         std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
894         PLOG(ERROR) << detail;
895         ThrowRuntimeException("%s", detail.c_str());
896       }
897     }
898   }
899 }
900 
DexPcMoved(Thread * thread ATTRIBUTE_UNUSED,Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,ArtMethod * method,uint32_t new_dex_pc)901 void Trace::DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
902                        Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
903                        ArtMethod* method,
904                        uint32_t new_dex_pc) {
905   // We're not recorded to listen to this kind of event, so complain.
906   LOG(ERROR) << "Unexpected dex PC event in tracing " << ArtMethod::PrettyMethod(method)
907              << " " << new_dex_pc;
908 }
909 
FieldRead(Thread * thread ATTRIBUTE_UNUSED,Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,ArtMethod * method,uint32_t dex_pc,ArtField * field ATTRIBUTE_UNUSED)910 void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED,
911                       Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
912                       ArtMethod* method,
913                       uint32_t dex_pc,
914                       ArtField* field ATTRIBUTE_UNUSED)
915     REQUIRES_SHARED(Locks::mutator_lock_) {
916   // We're not recorded to listen to this kind of event, so complain.
917   LOG(ERROR) << "Unexpected field read event in tracing " << ArtMethod::PrettyMethod(method)
918              << " " << dex_pc;
919 }
920 
FieldWritten(Thread * thread ATTRIBUTE_UNUSED,Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,ArtMethod * method,uint32_t dex_pc,ArtField * field ATTRIBUTE_UNUSED,const JValue & field_value ATTRIBUTE_UNUSED)921 void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
922                          Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
923                          ArtMethod* method,
924                          uint32_t dex_pc,
925                          ArtField* field ATTRIBUTE_UNUSED,
926                          const JValue& field_value ATTRIBUTE_UNUSED)
927     REQUIRES_SHARED(Locks::mutator_lock_) {
928   // We're not recorded to listen to this kind of event, so complain.
929   LOG(ERROR) << "Unexpected field write event in tracing " << ArtMethod::PrettyMethod(method)
930              << " " << dex_pc;
931 }
932 
MethodEntered(Thread * thread,ArtMethod * method)933 void Trace::MethodEntered(Thread* thread, ArtMethod* method) {
934   uint32_t thread_clock_diff = 0;
935   uint64_t timestamp_counter = 0;
936   ReadClocks(thread, &thread_clock_diff, &timestamp_counter);
937   LogMethodTraceEvent(thread, method, kTraceMethodEnter, thread_clock_diff, timestamp_counter);
938 }
939 
MethodExited(Thread * thread,ArtMethod * method,instrumentation::OptionalFrame frame ATTRIBUTE_UNUSED,JValue & return_value ATTRIBUTE_UNUSED)940 void Trace::MethodExited(Thread* thread,
941                          ArtMethod* method,
942                          instrumentation::OptionalFrame frame ATTRIBUTE_UNUSED,
943                          JValue& return_value ATTRIBUTE_UNUSED) {
944   uint32_t thread_clock_diff = 0;
945   uint64_t timestamp_counter = 0;
946   ReadClocks(thread, &thread_clock_diff, &timestamp_counter);
947   LogMethodTraceEvent(thread, method, kTraceMethodExit, thread_clock_diff, timestamp_counter);
948 }
949 
MethodUnwind(Thread * thread,ArtMethod * method,uint32_t dex_pc ATTRIBUTE_UNUSED)950 void Trace::MethodUnwind(Thread* thread,
951                          ArtMethod* method,
952                          uint32_t dex_pc ATTRIBUTE_UNUSED) {
953   uint32_t thread_clock_diff = 0;
954   uint64_t timestamp_counter = 0;
955   ReadClocks(thread, &thread_clock_diff, &timestamp_counter);
956   LogMethodTraceEvent(thread, method, kTraceUnroll, thread_clock_diff, timestamp_counter);
957 }
958 
ExceptionThrown(Thread * thread ATTRIBUTE_UNUSED,Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)959 void Trace::ExceptionThrown(Thread* thread ATTRIBUTE_UNUSED,
960                             Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
961     REQUIRES_SHARED(Locks::mutator_lock_) {
962   LOG(ERROR) << "Unexpected exception thrown event in tracing";
963 }
964 
ExceptionHandled(Thread * thread ATTRIBUTE_UNUSED,Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)965 void Trace::ExceptionHandled(Thread* thread ATTRIBUTE_UNUSED,
966                              Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
967     REQUIRES_SHARED(Locks::mutator_lock_) {
968   LOG(ERROR) << "Unexpected exception thrown event in tracing";
969 }
970 
Branch(Thread *,ArtMethod * method,uint32_t,int32_t)971 void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
972                    uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
973       REQUIRES_SHARED(Locks::mutator_lock_) {
974   LOG(ERROR) << "Unexpected branch event in tracing" << ArtMethod::PrettyMethod(method);
975 }
976 
WatchedFramePop(Thread * self ATTRIBUTE_UNUSED,const ShadowFrame & frame ATTRIBUTE_UNUSED)977 void Trace::WatchedFramePop(Thread* self ATTRIBUTE_UNUSED,
978                             const ShadowFrame& frame ATTRIBUTE_UNUSED) {
979   LOG(ERROR) << "Unexpected WatchedFramePop event in tracing";
980 }
981 
ReadClocks(Thread * thread,uint32_t * thread_clock_diff,uint64_t * timestamp_counter)982 void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint64_t* timestamp_counter) {
983   if (UseThreadCpuClock()) {
984     uint64_t clock_base = thread->GetTraceClockBase();
985     if (UNLIKELY(clock_base == 0)) {
986       // First event, record the base time in the map.
987       uint64_t time = thread->GetCpuMicroTime();
988       thread->SetTraceClockBase(time);
989     } else {
990       *thread_clock_diff = thread->GetCpuMicroTime() - clock_base;
991     }
992   }
993   if (UseWallClock()) {
994     *timestamp_counter = GetTimestamp();
995   }
996 }
997 
GetMethodLine(ArtMethod * method,uint32_t method_index)998 std::string Trace::GetMethodLine(ArtMethod* method, uint32_t method_index) {
999   method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
1000   return StringPrintf("%#x\t%s\t%s\t%s\t%s\n",
1001                       (method_index << TraceActionBits),
1002                       PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(),
1003                       method->GetName(),
1004                       method->GetSignature().ToString().c_str(),
1005                       method->GetDeclaringClassSourceFile());
1006 }
1007 
RecordStreamingMethodEvent(Thread * thread,ArtMethod * method,TraceAction action,uint32_t thread_clock_diff,uint64_t timestamp_counter)1008 void Trace::RecordStreamingMethodEvent(Thread* thread,
1009                                        ArtMethod* method,
1010                                        TraceAction action,
1011                                        uint32_t thread_clock_diff,
1012                                        uint64_t timestamp_counter) {
1013   uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer();
1014   size_t* current_offset = thread->GetMethodTraceIndexPtr();
1015   // Initialize the buffer lazily. It's just simpler to keep the creation at one place.
1016   if (method_trace_buffer == nullptr) {
1017     method_trace_buffer = new uintptr_t[std::max(kMinBufSize, kPerThreadBufSize)]();
1018     thread->SetMethodTraceBuffer(method_trace_buffer);
1019     *current_offset = 0;
1020 
1021     // This is the first event from this thread, so first record information about the thread.
1022     std::string thread_name;
1023     thread->GetThreadName(thread_name);
1024     static constexpr size_t kThreadNameHeaderSize = 7;
1025     uint8_t header[kThreadNameHeaderSize];
1026     Append2LE(header, 0);
1027     header[2] = kOpNewThread;
1028     // We use only 16 bits to encode thread id. On Android, we don't expect to use more than
1029     // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
1030     // See  __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
1031     // is currently less than 65536.
1032     // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
1033     // a map similar to method ids.
1034     DCHECK(!kIsTargetBuild || thread->GetTid() < (1 << 16));
1035     Append2LE(header + 3, static_cast<uint16_t>(thread->GetTid()));
1036     Append2LE(header + 5, static_cast<uint16_t>(thread_name.length()));
1037 
1038     {
1039       MutexLock mu(Thread::Current(), tracing_lock_);
1040       if (!trace_file_->WriteFully(header, kThreadNameHeaderSize) ||
1041           !trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(thread_name.c_str()),
1042                                    thread_name.length())) {
1043         PLOG(WARNING) << "Failed streaming a tracing event.";
1044       }
1045     }
1046   }
1047 
1048   size_t required_entries = (clock_source_ == TraceClockSource::kDual) ? 4 : 3;
1049   if (*current_offset + required_entries >= kPerThreadBufSize) {
1050     // We don't have space for further entries. Flush the contents of the buffer and reuse the
1051     // buffer to store contents. Reset the index to the start of the buffer.
1052     FlushStreamingBuffer(thread);
1053     *current_offset = 0;
1054   }
1055 
1056   // Record entry in per-thread trace buffer.
1057   int current_index = *current_offset;
1058   method_trace_buffer[current_index++] = reinterpret_cast<uintptr_t>(method);
1059   // TODO(mythria): We only need two bits to record the action. Consider merging
1060   // it with the method entry to save space.
1061   method_trace_buffer[current_index++] = action;
1062   if (UseThreadCpuClock()) {
1063     method_trace_buffer[current_index++] = thread_clock_diff;
1064   }
1065   if (UseWallClock()) {
1066     if (art::kRuntimePointerSize == PointerSize::k32) {
1067       // On 32-bit architectures store timestamp counter as two 32-bit values.
1068       method_trace_buffer[current_index++] = timestamp_counter >> 32;
1069       method_trace_buffer[current_index++] = static_cast<uint32_t>(timestamp_counter);
1070     } else {
1071       method_trace_buffer[current_index++] = timestamp_counter;
1072     }
1073   }
1074   *current_offset = current_index;
1075 }
1076 
WriteToBuf(uint8_t * header,size_t header_size,const std::string & data,size_t * current_index,uint8_t * buffer,size_t buffer_size)1077 void Trace::WriteToBuf(uint8_t* header,
1078                        size_t header_size,
1079                        const std::string& data,
1080                        size_t* current_index,
1081                        uint8_t* buffer,
1082                        size_t buffer_size) {
1083   EnsureSpace(buffer, current_index, buffer_size, header_size);
1084   memcpy(buffer + *current_index, header, header_size);
1085   *current_index += header_size;
1086 
1087   EnsureSpace(buffer, current_index, buffer_size, data.length());
1088   if (data.length() < buffer_size) {
1089     memcpy(buffer + *current_index, reinterpret_cast<const uint8_t*>(data.c_str()), data.length());
1090     *current_index += data.length();
1091   } else {
1092     // The data is larger than buffer, so write directly to the file. EnsureSpace should have
1093     // flushed any data in the buffer.
1094     DCHECK_EQ(*current_index, 0U);
1095     if (!trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(data.c_str()), data.length())) {
1096       PLOG(WARNING) << "Failed streaming a tracing event.";
1097     }
1098   }
1099 }
1100 
FlushStreamingBuffer(Thread * thread)1101 void Trace::FlushStreamingBuffer(Thread* thread) {
1102   // Take a tracing_lock_ to serialize writes across threads. We also need to allocate a unique
1103   // method id for each method. We do that by maintaining a map from id to method for each newly
1104   // seen method. tracing_lock_ is required to serialize these.
1105   MutexLock mu(Thread::Current(), tracing_lock_);
1106   uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer();
1107   // Create a temporary buffer to encode the trace events from the specified thread.
1108   size_t buffer_size = kPerThreadBufSize;
1109   size_t current_index = 0;
1110   std::unique_ptr<uint8_t[]> buffer(new uint8_t[std::max(kMinBufSize, buffer_size)]);
1111 
1112   size_t num_entries = *(thread->GetMethodTraceIndexPtr());
1113   for (size_t entry_index = 0; entry_index < num_entries;) {
1114     ArtMethod* method = reinterpret_cast<ArtMethod*>(method_trace_buffer[entry_index++]);
1115     TraceAction action = DecodeTraceAction(method_trace_buffer[entry_index++]);
1116     uint32_t thread_time = 0;
1117     uint32_t wall_time = 0;
1118     if (UseThreadCpuClock()) {
1119       thread_time = method_trace_buffer[entry_index++];
1120     }
1121     if (UseWallClock()) {
1122       uint64_t timestamp = method_trace_buffer[entry_index++];
1123       if (art::kRuntimePointerSize == PointerSize::k32) {
1124         // On 32-bit architectures timestamp is stored as two 32-bit values.
1125         timestamp = (timestamp << 32 | method_trace_buffer[entry_index++]);
1126       }
1127       wall_time = GetMicroTime(timestamp) - start_time_;
1128     }
1129 
1130     auto it = art_method_id_map_.find(method);
1131     uint32_t method_index = 0;
1132     // If we haven't seen this method before record information about the method.
1133     if (it == art_method_id_map_.end()) {
1134       art_method_id_map_.emplace(method, current_method_index_);
1135       method_index = current_method_index_;
1136       current_method_index_++;
1137       // Write a special block with the name.
1138       std::string method_line(GetMethodLine(method, method_index));
1139       static constexpr size_t kMethodNameHeaderSize = 5;
1140       uint8_t method_header[kMethodNameHeaderSize];
1141       DCHECK_LT(kMethodNameHeaderSize, kPerThreadBufSize);
1142       Append2LE(method_header, 0);
1143       method_header[2] = kOpNewMethod;
1144       Append2LE(method_header + 3, static_cast<uint16_t>(method_line.length()));
1145       WriteToBuf(method_header,
1146                  kMethodNameHeaderSize,
1147                  method_line,
1148                  &current_index,
1149                  buffer.get(),
1150                  buffer_size);
1151     } else {
1152       method_index = it->second;
1153     }
1154 
1155     const size_t record_size = GetRecordSize(clock_source_);
1156     DCHECK_LT(record_size, kPerThreadBufSize);
1157     EnsureSpace(buffer.get(), &current_index, buffer_size, record_size);
1158     EncodeEventEntry(
1159         buffer.get() + current_index, thread, method_index, action, thread_time, wall_time);
1160     current_index += record_size;
1161   }
1162 
1163   // Flush the contents of buffer to file.
1164   if (!trace_file_->WriteFully(buffer.get(), current_index)) {
1165     PLOG(WARNING) << "Failed streaming a tracing event.";
1166   }
1167 }
1168 
RecordMethodEvent(Thread * thread,ArtMethod * method,TraceAction action,uint32_t thread_clock_diff,uint64_t timestamp_counter)1169 void Trace::RecordMethodEvent(Thread* thread,
1170                               ArtMethod* method,
1171                               TraceAction action,
1172                               uint32_t thread_clock_diff,
1173                               uint64_t timestamp_counter) {
1174   // Advance cur_offset_ atomically.
1175   int32_t new_offset;
1176   int32_t old_offset = 0;
1177 
1178   // In the non-streaming case, we do a busy loop here trying to get
1179   // an offset to write our record and advance cur_offset_ for the
1180   // next use.
1181   // Although multiple threads can call this method concurrently,
1182   // the compare_exchange_weak here is still atomic (by definition).
1183   // A succeeding update is visible to other cores when they pass
1184   // through this point.
1185   old_offset = cur_offset_.load(std::memory_order_relaxed);  // Speculative read
1186   do {
1187     new_offset = old_offset + GetRecordSize(clock_source_);
1188     if (static_cast<size_t>(new_offset) > buffer_size_) {
1189       overflow_ = true;
1190       return;
1191     }
1192   } while (!cur_offset_.compare_exchange_weak(old_offset, new_offset, std::memory_order_relaxed));
1193 
1194   // Write data into the tracing buffer (if not streaming) or into a
1195   // small buffer on the stack (if streaming) which we'll put into the
1196   // tracing buffer below.
1197   //
1198   // These writes to the tracing buffer are synchronised with the
1199   // future reads that (only) occur under FinishTracing(). The callers
1200   // of FinishTracing() acquire locks and (implicitly) synchronise
1201   // the buffer memory.
1202   uint8_t* ptr;
1203   ptr = buf_.get() + old_offset;
1204   uint32_t wall_clock_diff = GetMicroTime(timestamp_counter) - start_time_;
1205   MutexLock mu(Thread::Current(), tracing_lock_);
1206   EncodeEventEntry(
1207       ptr, thread, EncodeTraceMethod(method), action, thread_clock_diff, wall_clock_diff);
1208 }
1209 
LogMethodTraceEvent(Thread * thread,ArtMethod * method,TraceAction action,uint32_t thread_clock_diff,uint64_t timestamp_counter)1210 void Trace::LogMethodTraceEvent(Thread* thread,
1211                                 ArtMethod* method,
1212                                 TraceAction action,
1213                                 uint32_t thread_clock_diff,
1214                                 uint64_t timestamp_counter) {
1215   // This method is called in both tracing modes (method and sampling). In sampling mode, this
1216   // method is only called by the sampling thread. In method tracing mode, it can be called
1217   // concurrently.
1218 
1219   // Ensure we always use the non-obsolete version of the method so that entry/exit events have the
1220   // same pointer value.
1221   method = method->GetNonObsoleteMethod();
1222 
1223   if (trace_output_mode_ == TraceOutputMode::kStreaming) {
1224     RecordStreamingMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter);
1225   } else {
1226     RecordMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter);
1227   }
1228 }
1229 
EncodeEventEntry(uint8_t * ptr,Thread * thread,uint32_t method_index,TraceAction action,uint32_t thread_clock_diff,uint32_t wall_clock_diff)1230 void Trace::EncodeEventEntry(uint8_t* ptr,
1231                              Thread* thread,
1232                              uint32_t method_index,
1233                              TraceAction action,
1234                              uint32_t thread_clock_diff,
1235                              uint32_t wall_clock_diff) {
1236   static constexpr size_t kPacketSize = 14U;  // The maximum size of data in a packet.
1237   uint32_t method_value = (method_index << TraceActionBits) | action;
1238   Append2LE(ptr, thread->GetTid());
1239   Append4LE(ptr + 2, method_value);
1240   ptr += 6;
1241 
1242   if (UseThreadCpuClock()) {
1243     Append4LE(ptr, thread_clock_diff);
1244     ptr += 4;
1245   }
1246   if (UseWallClock()) {
1247     Append4LE(ptr, wall_clock_diff);
1248   }
1249   static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect.");
1250 }
1251 
EnsureSpace(uint8_t * buffer,size_t * current_index,size_t buffer_size,size_t required_size)1252 void Trace::EnsureSpace(uint8_t* buffer,
1253                         size_t* current_index,
1254                         size_t buffer_size,
1255                         size_t required_size) {
1256   if (*current_index + required_size < buffer_size) {
1257     return;
1258   }
1259 
1260   if (!trace_file_->WriteFully(buffer, *current_index)) {
1261     PLOG(WARNING) << "Failed streaming a tracing event.";
1262   }
1263   *current_index = 0;
1264 }
1265 
DumpMethodList(std::ostream & os)1266 void Trace::DumpMethodList(std::ostream& os) {
1267   MutexLock mu(Thread::Current(), tracing_lock_);
1268   for (auto const& entry : art_method_id_map_) {
1269     os << GetMethodLine(entry.first, entry.second);
1270   }
1271 }
1272 
DumpThreadList(std::ostream & os)1273 void Trace::DumpThreadList(std::ostream& os) {
1274   for (const auto& it : threads_list_) {
1275     // We use only 16 bits to encode thread id. On Android, we don't expect to use more than
1276     // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
1277     // See  __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
1278     // is currently less than 65536.
1279     // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
1280     // a map similar to method ids.
1281     DCHECK(!kIsTargetBuild || it.first < (1 << 16));
1282     os << static_cast<uint16_t>(it.first) << "\t" << it.second << "\n";
1283   }
1284 }
1285 
StoreExitingThreadInfo(Thread * thread)1286 void Trace::StoreExitingThreadInfo(Thread* thread) {
1287   MutexLock mu(thread, *Locks::trace_lock_);
1288   if (the_trace_ != nullptr) {
1289     the_trace_->UpdateThreadsList(thread);
1290   }
1291 }
1292 
GetOutputMode()1293 Trace::TraceOutputMode Trace::GetOutputMode() {
1294   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1295   CHECK(the_trace_ != nullptr) << "Trace output mode requested, but no trace currently running";
1296   return the_trace_->trace_output_mode_;
1297 }
1298 
GetMode()1299 Trace::TraceMode Trace::GetMode() {
1300   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1301   CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
1302   return the_trace_->trace_mode_;
1303 }
1304 
GetFlags()1305 int Trace::GetFlags() {
1306   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1307   CHECK(the_trace_ != nullptr) << "Trace flags requested, but no trace currently running";
1308   return the_trace_->flags_;
1309 }
1310 
GetIntervalInMillis()1311 int Trace::GetIntervalInMillis() {
1312   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1313   CHECK(the_trace_ != nullptr) << "Trace interval requested, but no trace currently running";
1314   return the_trace_->interval_us_;
1315 }
1316 
GetBufferSize()1317 size_t Trace::GetBufferSize() {
1318   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1319   CHECK(the_trace_ != nullptr) << "Trace buffer size requested, but no trace currently running";
1320   return the_trace_->buffer_size_;
1321 }
1322 
IsTracingEnabled()1323 bool Trace::IsTracingEnabled() {
1324   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
1325   return the_trace_ != nullptr;
1326 }
1327 
1328 }  // namespace art
1329