1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/call_stacks/call_stack_profile_metrics_provider.h"
6
7 #include <utility>
8 #include <vector>
9
10 #include "base/check.h"
11 #include "base/feature_list.h"
12 #include "base/functional/bind.h"
13 #include "base/metrics/metrics_hashes.h"
14 #include "base/no_destructor.h"
15 #include "base/ranges/algorithm.h"
16 #include "base/synchronization/lock.h"
17 #include "base/thread_annotations.h"
18 #include "base/time/time.h"
19 #include "sampled_profile.pb.h"
20 #include "third_party/metrics_proto/chrome_user_metrics_extension.pb.h"
21
22 namespace metrics {
23
24 namespace {
25
26 constexpr base::FeatureState kSamplingProfilerReportingDefaultState =
27 base::FEATURE_ENABLED_BY_DEFAULT;
28
SamplingProfilerReportingEnabled()29 bool SamplingProfilerReportingEnabled() {
30 // TODO(crbug.com/1384179): Do not call this function before the FeatureList
31 // is registered.
32 if (!base::FeatureList::GetInstance()) {
33 // The FeatureList is not registered: use the feature's default state. This
34 // means that any override from the command line or variations service is
35 // ignored.
36 return kSamplingProfilerReportingDefaultState ==
37 base::FEATURE_ENABLED_BY_DEFAULT;
38 }
39 return base::FeatureList::IsEnabled(kSamplingProfilerReporting);
40 }
41
42 // Cap the number of pending profiles to avoid excessive performance overhead
43 // due to profile deserialization when profile uploads are delayed (e.g. due to
44 // being offline). Capping at this threshold loses approximately 0.5% of
45 // profiles on canary and dev.
46 //
47 // TODO(wittman): Remove this threshold after crbug.com/903972 is fixed.
48 const size_t kMaxPendingProfiles = 1250;
49
50 // Provides access to the singleton interceptor callback instance for CPU
51 // profiles. Accessed asynchronously on the profiling thread after profiling has
52 // been started.
53 CallStackProfileMetricsProvider::InterceptorCallback&
GetCpuInterceptorCallbackInstance()54 GetCpuInterceptorCallbackInstance() {
55 static base::NoDestructor<
56 CallStackProfileMetricsProvider::InterceptorCallback>
57 instance;
58 return *instance;
59 }
60
61 // PendingProfiles ------------------------------------------------------------
62
63 // Singleton class responsible for retaining profiles received from
64 // CallStackProfileBuilder. These are then sent to UMA on the invocation of
65 // CallStackProfileMetricsProvider::ProvideCurrentSessionData(). We need to
66 // store the profiles outside of a CallStackProfileMetricsProvider instance
67 // since callers may start profiling before the CallStackProfileMetricsProvider
68 // is created.
69 //
70 // Member functions on this class may be called on any thread.
71 class PendingProfiles {
72 public:
73 static PendingProfiles* GetInstance();
74
75 PendingProfiles(const PendingProfiles&) = delete;
76 PendingProfiles& operator=(const PendingProfiles&) = delete;
77
78 // Retrieves all the pending profiles.
79 std::vector<SampledProfile> RetrieveProfiles();
80
81 // Enables the collection of profiles by MaybeCollect*Profile if |enabled| is
82 // true. Otherwise, clears the currently collected profiles and ignores
83 // profiles provided to future invocations of MaybeCollect*Profile.
84 void SetCollectionEnabled(bool enabled);
85
86 // Collects |profile|. It may be stored in a serialized form, or ignored,
87 // depending on the pre-defined storage capacity and whether collection is
88 // enabled. |profile| is not const& because it must be passed with std::move.
89 void MaybeCollectProfile(base::TimeTicks profile_start_time,
90 SampledProfile profile);
91
92 // Collects |serialized_profile|. It may be ignored depending on the
93 // pre-defined storage capacity and whether collection is enabled.
94 // |serialized_profile| must be passed with std::move because it could be very
95 // large.
96 void MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,
97 std::string&& serialized_profile);
98
99 #if BUILDFLAG(IS_CHROMEOS)
100 // Returns all the serialized profiles that have been collected but not yet
101 // retrieved. For thread-safety reasons, returns a copy, so this is an
102 // expensive function. Fortunately, it's only called during ChromeOS tast
103 // integration tests.
GetUnretrievedProfiles()104 std::vector<std::string> GetUnretrievedProfiles() {
105 base::AutoLock scoped_lock(lock_);
106 return serialized_profiles_;
107 }
108 #endif // BUILDFLAG(IS_CHROMEOS)
109
110 // Allows testing against the initial state multiple times.
111 void ResetToDefaultStateForTesting();
112
113 private:
114 friend class base::NoDestructor<PendingProfiles>;
115
116 PendingProfiles();
117 ~PendingProfiles() = delete;
118
119 // Returns true if collection is enabled for a given profile based on its
120 // |profile_start_time|. The |lock_| must be held prior to calling this
121 // method.
122 bool IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const
123 EXCLUSIVE_LOCKS_REQUIRED(lock_);
124
125 mutable base::Lock lock_;
126
127 // If true, profiles provided to MaybeCollect*Profile should be collected.
128 // Otherwise they will be ignored.
129 // |collection_enabled_| is initialized to true to collect any profiles that
130 // are generated prior to creation of the CallStackProfileMetricsProvider.
131 // The ultimate disposition of these pre-creation collected profiles will be
132 // determined by the initial recording state provided to
133 // CallStackProfileMetricsProvider.
134 bool collection_enabled_ GUARDED_BY(lock_) = true;
135
136 // The last time collection was disabled. Used to determine if collection was
137 // disabled at any point since a profile was started.
138 base::TimeTicks last_collection_disable_time_ GUARDED_BY(lock_);
139
140 // The last time collection was enabled. Used to determine if collection was
141 // enabled at any point since a profile was started.
142 base::TimeTicks last_collection_enable_time_ GUARDED_BY(lock_);
143
144 // The set of completed serialized profiles that should be reported.
145 std::vector<std::string> serialized_profiles_ GUARDED_BY(lock_);
146 };
147
FindHashNameIndexInProfile(const SampledProfile & profile,const uint64_t name_hash)148 absl::optional<int32_t> FindHashNameIndexInProfile(
149 const SampledProfile& profile,
150 const uint64_t name_hash) {
151 const auto& name_hashes = profile.call_stack_profile().metadata_name_hash();
152 const auto loc = base::ranges::find(name_hashes, name_hash);
153 if (loc == name_hashes.end()) {
154 return absl::nullopt;
155 }
156 return loc - name_hashes.begin();
157 }
158
159 // Remove temp profile metadata for LCP tagging.
RemoveTempLCPMetadata(SampledProfile & profile)160 void RemoveTempLCPMetadata(SampledProfile& profile) {
161 const uint64_t nav_start_name_hash =
162 base::HashMetricName("Internal.LargestContentfulPaint.NavigationStart");
163 const uint64_t document_token_name_hash =
164 base::HashMetricName("Internal.LargestContentfulPaint.DocumentToken");
165
166 absl::optional<int32_t> navigation_start_name_hash_index =
167 FindHashNameIndexInProfile(profile, nav_start_name_hash);
168 absl::optional<int32_t> document_token_name_hash_index =
169 FindHashNameIndexInProfile(profile, document_token_name_hash);
170
171 // Remove profile_metadata items.
172 auto* profile_metadata =
173 profile.mutable_call_stack_profile()->mutable_profile_metadata();
174 profile_metadata->erase(
175 base::ranges::remove_if(
176 *profile_metadata,
177 [&](const CallStackProfile_MetadataItem& item) {
178 return item.name_hash_index() == navigation_start_name_hash_index ||
179 item.name_hash_index() == document_token_name_hash_index;
180 }),
181 profile_metadata->end());
182
183 // Remove name hashes
184 auto* name_hashes =
185 profile.mutable_call_stack_profile()->mutable_metadata_name_hash();
186 name_hashes->erase(
187 base::ranges::remove_if(*name_hashes,
188 [&](uint64_t name_hash) {
189 return name_hash == nav_start_name_hash ||
190 name_hash == document_token_name_hash;
191 }),
192 name_hashes->end());
193
194 // Update name_hash_index of all MetadataItem.
195 const auto shift_index = [&](CallStackProfile_MetadataItem& item) {
196 int64_t offset = 0;
197 if (navigation_start_name_hash_index.has_value() &&
198 item.name_hash_index() > *navigation_start_name_hash_index) {
199 offset++;
200 }
201 if (document_token_name_hash_index.has_value() &&
202 item.name_hash_index() > *document_token_name_hash_index) {
203 offset++;
204 }
205
206 item.set_name_hash_index(item.name_hash_index() - offset);
207 };
208
209 base::ranges::for_each(*profile_metadata, shift_index);
210 for (auto& stack_sample :
211 *profile.mutable_call_stack_profile()->mutable_stack_sample()) {
212 base::ranges::for_each(*stack_sample.mutable_metadata(), shift_index);
213 }
214
215 // Remove timestamps
216 for (auto& stack_sample :
217 *profile.mutable_call_stack_profile()->mutable_stack_sample()) {
218 stack_sample.clear_sample_time_offset_ms();
219 }
220
221 if (profile.trigger_event() != SampledProfile::PERIODIC_HEAP_COLLECTION) {
222 profile.mutable_call_stack_profile()->clear_profile_time_offset_ms();
223 }
224 }
225
226 // static
GetInstance()227 PendingProfiles* PendingProfiles::GetInstance() {
228 // Singleton for performance rather than correctness reasons.
229 static base::NoDestructor<PendingProfiles> instance;
230 return instance.get();
231 }
232
RetrieveProfiles()233 std::vector<SampledProfile> PendingProfiles::RetrieveProfiles() {
234 std::vector<std::string> serialized_profiles;
235
236 {
237 base::AutoLock scoped_lock(lock_);
238 serialized_profiles.swap(serialized_profiles_);
239 }
240
241 // Deserialize all serialized profiles, skipping over any that fail to parse.
242 std::vector<SampledProfile> profiles;
243 profiles.reserve(serialized_profiles.size());
244 for (const auto& serialized_profile : serialized_profiles) {
245 SampledProfile profile;
246 if (profile.ParseFromString(serialized_profile)) {
247 profiles.push_back(std::move(profile));
248 }
249 }
250
251 return profiles;
252 }
253
SetCollectionEnabled(bool enabled)254 void PendingProfiles::SetCollectionEnabled(bool enabled) {
255 base::AutoLock scoped_lock(lock_);
256
257 collection_enabled_ = enabled;
258
259 if (!collection_enabled_) {
260 serialized_profiles_.clear();
261 last_collection_disable_time_ = base::TimeTicks::Now();
262 } else {
263 last_collection_enable_time_ = base::TimeTicks::Now();
264 }
265 }
266
IsCollectionEnabledForProfile(base::TimeTicks profile_start_time) const267 bool PendingProfiles::IsCollectionEnabledForProfile(
268 base::TimeTicks profile_start_time) const {
269 lock_.AssertAcquired();
270
271 // Scenario 1: return false if collection is disabled.
272 if (!collection_enabled_)
273 return false;
274
275 // Scenario 2: return false if collection is disabled after the start of
276 // collection for this profile.
277 if (!last_collection_disable_time_.is_null() &&
278 last_collection_disable_time_ >= profile_start_time) {
279 return false;
280 }
281
282 // Scenario 3: return false if collection is disabled before the start of
283 // collection and re-enabled after the start. Note that this is different from
284 // scenario 1 where re-enabling never happens.
285 if (!last_collection_disable_time_.is_null() &&
286 !last_collection_enable_time_.is_null() &&
287 last_collection_enable_time_ >= profile_start_time) {
288 return false;
289 }
290
291 return true;
292 }
293
MaybeCollectProfile(base::TimeTicks profile_start_time,SampledProfile profile)294 void PendingProfiles::MaybeCollectProfile(base::TimeTicks profile_start_time,
295 SampledProfile profile) {
296 {
297 base::AutoLock scoped_lock(lock_);
298
299 if (!IsCollectionEnabledForProfile(profile_start_time))
300 return;
301 }
302
303 // Serialize the profile without holding the lock.
304 std::string serialized_profile;
305 profile.SerializeToString(&serialized_profile);
306
307 MaybeCollectSerializedProfile(profile_start_time,
308 std::move(serialized_profile));
309 }
310
MaybeCollectSerializedProfile(base::TimeTicks profile_start_time,std::string && serialized_profile)311 void PendingProfiles::MaybeCollectSerializedProfile(
312 base::TimeTicks profile_start_time,
313 std::string&& serialized_profile) {
314 base::AutoLock scoped_lock(lock_);
315
316 // There is no room for additional profiles.
317 if (serialized_profiles_.size() >= kMaxPendingProfiles)
318 return;
319
320 if (IsCollectionEnabledForProfile(profile_start_time))
321 serialized_profiles_.push_back(std::move(serialized_profile));
322 }
323
ResetToDefaultStateForTesting()324 void PendingProfiles::ResetToDefaultStateForTesting() {
325 base::AutoLock scoped_lock(lock_);
326
327 collection_enabled_ = true;
328 last_collection_disable_time_ = base::TimeTicks();
329 last_collection_enable_time_ = base::TimeTicks();
330 serialized_profiles_.clear();
331 }
332
333 PendingProfiles::PendingProfiles() = default;
334
335 #if BUILDFLAG(IS_CHROMEOS)
336 // A class that records the number of minimally-successful profiles received
337 // over time. In ChromeOS, this is used by the ui.StackSampledMetrics tast
338 // integration test to confirm that stack-sampled metrics are working on
339 // all the various ChromeOS boards.
340 class ReceivedProfileCounter {
341 public:
342 static ReceivedProfileCounter* GetInstance();
343
344 ReceivedProfileCounter(const ReceivedProfileCounter&) = delete;
345 ReceivedProfileCounter& operator=(const ReceivedProfileCounter&) = delete;
346 ~ReceivedProfileCounter() = delete;
347
348 // Gets the counts of all successfully collected profiles, broken down by
349 // process type and thread type. "Successfully collected" is defined pretty
350 // minimally (we got a couple of frames).
351 CallStackProfileMetricsProvider::ProcessThreadCount
352 GetSuccessfullyCollectedCounts();
353
354 // Given a list of profiles returned from PendingProfiles::RetrieveProfiles(),
355 // add counts from all the successful profiles in the list to our counts for
356 // later.
357 void OnRetrieveProfiles(const std::vector<SampledProfile>& profiles);
358
359 // Allows testing against the initial state multiple times.
360 void ResetToDefaultStateForTesting(); // IN-TEST
361
362 private:
363 friend class base::NoDestructor<ReceivedProfileCounter>;
364
365 ReceivedProfileCounter() = default;
366
367 // Returns true if the given profile was success enough to be counted in
368 // retrieved_successful_counts_.
369 static bool WasMinimallySuccessful(const SampledProfile& profile);
370
371 mutable base::Lock lock_;
372
373 // Count of successfully-stack-walked SampledProfiles retrieved since startup.
374 // "success" is defined by WasMinimallySuccessful().
375 CallStackProfileMetricsProvider::ProcessThreadCount
376 retrieved_successful_counts_ GUARDED_BY(lock_);
377 };
378
379 // static
GetInstance()380 ReceivedProfileCounter* ReceivedProfileCounter::GetInstance() {
381 static base::NoDestructor<ReceivedProfileCounter> instance;
382 return instance.get();
383 }
384
385 // static
WasMinimallySuccessful(const SampledProfile & profile)386 bool ReceivedProfileCounter::WasMinimallySuccessful(
387 const SampledProfile& profile) {
388 // If we don't have a process or thread, we don't understand the profile.
389 if (!profile.has_process() || !profile.has_thread()) {
390 return false;
391 }
392
393 // Since we can't symbolize the stacks, "successful" here just means that the
394 // stack has at least 2 frames. (The current instruction pointer should always
395 // count as one, so two means we had some luck walking the stack.)
396 const auto& stacks = profile.call_stack_profile().stack();
397 return base::ranges::find_if(stacks,
398 [](const CallStackProfile::Stack& stack) {
399 return stack.frame_size() >= 2;
400 }) != stacks.end();
401 }
402
OnRetrieveProfiles(const std::vector<SampledProfile> & profiles)403 void ReceivedProfileCounter::OnRetrieveProfiles(
404 const std::vector<SampledProfile>& profiles) {
405 base::AutoLock scoped_lock(lock_);
406 for (const auto& profile : profiles) {
407 if (WasMinimallySuccessful(profile)) {
408 ++retrieved_successful_counts_[profile.process()][profile.thread()];
409 }
410 }
411 }
412
413 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()414 ReceivedProfileCounter::GetSuccessfullyCollectedCounts() {
415 CallStackProfileMetricsProvider::ProcessThreadCount successful_counts;
416
417 {
418 base::AutoLock scoped_lock(lock_);
419 // Start with count of profiles we've already sent
420 successful_counts = retrieved_successful_counts_;
421 }
422
423 // And then add in any pending ones. Copying and then deserializing all the
424 // profiles is expensive, but again, this should only be called during tast
425 // integration tests.
426 std::vector<std::string> unretrieved_profiles(
427 PendingProfiles::GetInstance()->GetUnretrievedProfiles());
428 for (const std::string& serialized_profile : unretrieved_profiles) {
429 SampledProfile profile;
430 if (profile.ParseFromString(serialized_profile)) {
431 if (WasMinimallySuccessful(profile)) {
432 ++successful_counts[profile.process()][profile.thread()];
433 }
434 }
435 }
436
437 return successful_counts;
438 }
439
ResetToDefaultStateForTesting()440 void ReceivedProfileCounter::ResetToDefaultStateForTesting() {
441 base::AutoLock scoped_lock(lock_);
442 retrieved_successful_counts_.clear();
443 }
444
445 #endif // BUILDFLAG(IS_CHROMEOS)
446 } // namespace
447
448 // CallStackProfileMetricsProvider --------------------------------------------
449
450 BASE_FEATURE(kSamplingProfilerReporting,
451 "SamplingProfilerReporting",
452 kSamplingProfilerReportingDefaultState);
453
454 CallStackProfileMetricsProvider::CallStackProfileMetricsProvider() = default;
455 CallStackProfileMetricsProvider::~CallStackProfileMetricsProvider() = default;
456
457 // static
ReceiveProfile(base::TimeTicks profile_start_time,SampledProfile profile)458 void CallStackProfileMetricsProvider::ReceiveProfile(
459 base::TimeTicks profile_start_time,
460 SampledProfile profile) {
461 if (GetCpuInterceptorCallbackInstance() &&
462 (profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
463 profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION)) {
464 GetCpuInterceptorCallbackInstance().Run(std::move(profile));
465 return;
466 }
467
468 if (profile.trigger_event() != SampledProfile::PERIODIC_HEAP_COLLECTION &&
469 !SamplingProfilerReportingEnabled()) {
470 return;
471 }
472 PendingProfiles::GetInstance()->MaybeCollectProfile(profile_start_time,
473 std::move(profile));
474 }
475
476 // static
ReceiveSerializedProfile(base::TimeTicks profile_start_time,bool is_heap_profile,std::string && serialized_profile)477 void CallStackProfileMetricsProvider::ReceiveSerializedProfile(
478 base::TimeTicks profile_start_time,
479 bool is_heap_profile,
480 std::string&& serialized_profile) {
481 // Note: All parameters of this function come from a Mojo message from an
482 // untrusted process.
483 if (GetCpuInterceptorCallbackInstance()) {
484 // GetCpuInterceptorCallbackInstance() is set only in tests, so it's safe to
485 // trust `is_heap_profile` and `serialized_profile` here.
486 DCHECK(!is_heap_profile);
487 SampledProfile profile;
488 if (profile.ParseFromString(serialized_profile)) {
489 DCHECK(profile.trigger_event() == SampledProfile::PROCESS_STARTUP ||
490 profile.trigger_event() == SampledProfile::PERIODIC_COLLECTION);
491 GetCpuInterceptorCallbackInstance().Run(std::move(profile));
492 }
493 return;
494 }
495
496 // If an attacker spoofs `is_heap_profile` or `profile_start_time`, the worst
497 // they can do is cause `serialized_profile` to be sent to UMA when profile
498 // reporting should be disabled.
499 if (!is_heap_profile && !SamplingProfilerReportingEnabled()) {
500 return;
501 }
502 PendingProfiles::GetInstance()->MaybeCollectSerializedProfile(
503 profile_start_time, std::move(serialized_profile));
504 }
505
506 // static
SetCpuInterceptorCallbackForTesting(InterceptorCallback callback)507 void CallStackProfileMetricsProvider::SetCpuInterceptorCallbackForTesting(
508 InterceptorCallback callback) {
509 GetCpuInterceptorCallbackInstance() = std::move(callback);
510 }
511
512 #if BUILDFLAG(IS_CHROMEOS)
513 // static
514 CallStackProfileMetricsProvider::ProcessThreadCount
GetSuccessfullyCollectedCounts()515 CallStackProfileMetricsProvider::GetSuccessfullyCollectedCounts() {
516 return ReceivedProfileCounter::GetInstance()
517 ->GetSuccessfullyCollectedCounts();
518 }
519 #endif
520
OnRecordingEnabled()521 void CallStackProfileMetricsProvider::OnRecordingEnabled() {
522 PendingProfiles::GetInstance()->SetCollectionEnabled(true);
523 }
524
OnRecordingDisabled()525 void CallStackProfileMetricsProvider::OnRecordingDisabled() {
526 PendingProfiles::GetInstance()->SetCollectionEnabled(false);
527 }
528
ProvideCurrentSessionData(ChromeUserMetricsExtension * uma_proto)529 void CallStackProfileMetricsProvider::ProvideCurrentSessionData(
530 ChromeUserMetricsExtension* uma_proto) {
531 std::vector<SampledProfile> profiles =
532 PendingProfiles::GetInstance()->RetrieveProfiles();
533 #if BUILDFLAG(IS_CHROMEOS)
534 ReceivedProfileCounter::GetInstance()->OnRetrieveProfiles(profiles);
535 #endif
536
537 for (auto& profile : profiles) {
538 // Only heap samples should ever be received if SamplingProfilerReporting is
539 // disabled.
540 DCHECK(SamplingProfilerReportingEnabled() ||
541 profile.trigger_event() == SampledProfile::PERIODIC_HEAP_COLLECTION);
542 RemoveTempLCPMetadata(profile);
543 *uma_proto->add_sampled_profile() = std::move(profile);
544 }
545 }
546
547 // static
ResetStaticStateForTesting()548 void CallStackProfileMetricsProvider::ResetStaticStateForTesting() {
549 PendingProfiles::GetInstance()->ResetToDefaultStateForTesting();
550 #if BUILDFLAG(IS_CHROMEOS)
551 ReceivedProfileCounter::GetInstance()
552 ->ResetToDefaultStateForTesting(); // IN-TEST
553 #endif
554 }
555
556 } // namespace metrics
557