• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "metrics_collector.h"
18 
19 #include <sysexits.h>
20 #include <time.h>
21 
22 #include <memory>
23 
24 #include <base/bind.h>
25 #include <base/files/file_path.h>
26 #include <base/files/file_util.h>
27 #include <base/hash.h>
28 #include <base/logging.h>
29 #include <base/strings/string_number_conversions.h>
30 #include <base/strings/string_split.h>
31 #include <base/strings/string_util.h>
32 #include <base/strings/stringprintf.h>
33 #include <brillo/binder_watcher.h>
34 #include <brillo/osrelease_reader.h>
35 
36 #include "constants.h"
37 #include "metrics_collector_service_impl.h"
38 
39 using base::FilePath;
40 using base::StringPrintf;
41 using base::Time;
42 using base::TimeDelta;
43 using base::TimeTicks;
44 using chromeos_metrics::PersistentInteger;
45 using std::map;
46 using std::string;
47 using std::vector;
48 
49 namespace {
50 
51 const int kSecondsPerMinute = 60;
52 const int kMinutesPerHour = 60;
53 const int kHoursPerDay = 24;
54 const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
55 const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
56 const int kDaysPerWeek = 7;
57 const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;
58 
59 // Interval between calls to UpdateStats().
60 const uint32_t kUpdateStatsIntervalMs = 300000;
61 
62 const char kKernelCrashDetectedFile[] =
63     "/data/misc/crash_reporter/run/kernel-crash-detected";
64 const char kUncleanShutdownDetectedFile[] =
65     "/var/run/unclean-shutdown-detected";
66 
67 const int kMetricMeminfoInterval = 30;    // seconds
68 
69 const char kMeminfoFileName[] = "/proc/meminfo";
70 const char kVmStatFileName[] = "/proc/vmstat";
71 
72 const char kWeaveComponent[] = "metrics";
73 const char kWeaveTrait[] = "_metrics";
74 
75 }  // namespace
76 
77 // Zram sysfs entries.
78 
79 const char MetricsCollector::kComprDataSizeName[] = "compr_data_size";
80 const char MetricsCollector::kOrigDataSizeName[] = "orig_data_size";
81 const char MetricsCollector::kZeroPagesName[] = "zero_pages";
82 
83 // Memory use stats collection intervals.  We collect some memory use interval
84 // at these intervals after boot, and we stop collecting after the last one,
85 // with the assumption that in most cases the memory use won't change much
86 // after that.
87 static const int kMemuseIntervals[] = {
88   1 * kSecondsPerMinute,    // 1 minute mark
89   4 * kSecondsPerMinute,    // 5 minute mark
90   25 * kSecondsPerMinute,   // 0.5 hour mark
91   120 * kSecondsPerMinute,  // 2.5 hour mark
92   600 * kSecondsPerMinute,  // 12.5 hour mark
93 };
94 
MetricsCollector()95 MetricsCollector::MetricsCollector()
96     : memuse_final_time_(0),
97       memuse_interval_index_(0) {}
98 
~MetricsCollector()99 MetricsCollector::~MetricsCollector() {
100 }
101 
102 // static
GetActiveTime()103 double MetricsCollector::GetActiveTime() {
104   struct timespec ts;
105   int r = clock_gettime(CLOCK_MONOTONIC, &ts);
106   if (r < 0) {
107     PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed";
108     return 0;
109   } else {
110     return ts.tv_sec + static_cast<double>(ts.tv_nsec) / (1000 * 1000 * 1000);
111   }
112 }
113 
Run()114 int MetricsCollector::Run() {
115   if (CheckSystemCrash(kKernelCrashDetectedFile)) {
116     ProcessKernelCrash();
117   }
118 
119   if (CheckSystemCrash(kUncleanShutdownDetectedFile)) {
120     ProcessUncleanShutdown();
121   }
122 
123   // On OS version change, clear version stats (which are reported daily).
124   int32_t version = GetOsVersionHash();
125   if (version_cycle_->Get() != version) {
126     version_cycle_->Set(version);
127     kernel_crashes_version_count_->Set(0);
128     version_cumulative_active_use_->Set(0);
129     version_cumulative_cpu_use_->Set(0);
130   }
131 
132   // Start metricscollectorservice
133   android::sp<BnMetricsCollectorServiceImpl> metrics_collector_service =
134       new BnMetricsCollectorServiceImpl(this);
135   android::status_t status = android::defaultServiceManager()->addService(
136       metrics_collector_service->getInterfaceDescriptor(),
137       metrics_collector_service);
138   CHECK(status == android::OK)
139       << "failed to register service metricscollectorservice";
140 
141   // Watch Binder events in the main loop
142   brillo::BinderWatcher binder_watcher;
143   CHECK(binder_watcher.Init()) << "Binder FD watcher init failed";
144   return brillo::Daemon::Run();
145 }
146 
GetOsVersionHash()147 uint32_t MetricsCollector::GetOsVersionHash() {
148   brillo::OsReleaseReader reader;
149   reader.Load();
150   string version;
151   if (!reader.GetString(metrics::kProductVersion, &version)) {
152     LOG(ERROR) << "failed to read the product version.";
153     version = metrics::kDefaultVersion;
154   }
155 
156   uint32_t version_hash = base::Hash(version);
157   if (testing_) {
158     version_hash = 42;  // return any plausible value for the hash
159   }
160   return version_hash;
161 }
162 
Init(bool testing,MetricsLibraryInterface * metrics_lib,const string & diskstats_path,const base::FilePath & private_metrics_directory,const base::FilePath & shared_metrics_directory)163 void MetricsCollector::Init(bool testing, MetricsLibraryInterface* metrics_lib,
164                             const string& diskstats_path,
165                             const base::FilePath& private_metrics_directory,
166                             const base::FilePath& shared_metrics_directory) {
167   CHECK(metrics_lib);
168   testing_ = testing;
169   shared_metrics_directory_ = shared_metrics_directory;
170   metrics_lib_ = metrics_lib;
171 
172   daily_active_use_.reset(new PersistentInteger("Platform.UseTime.PerDay",
173                                                 private_metrics_directory));
174   version_cumulative_active_use_.reset(new PersistentInteger(
175       "Platform.CumulativeUseTime", private_metrics_directory));
176   version_cumulative_cpu_use_.reset(new PersistentInteger(
177       "Platform.CumulativeCpuTime", private_metrics_directory));
178 
179   kernel_crash_interval_.reset(new PersistentInteger(
180       "Platform.KernelCrashInterval", private_metrics_directory));
181   unclean_shutdown_interval_.reset(new PersistentInteger(
182       "Platform.UncleanShutdownInterval", private_metrics_directory));
183   user_crash_interval_.reset(new PersistentInteger("Platform.UserCrashInterval",
184                                                    private_metrics_directory));
185 
186   any_crashes_daily_count_.reset(new PersistentInteger(
187       "Platform.AnyCrashes.PerDay", private_metrics_directory));
188   any_crashes_weekly_count_.reset(new PersistentInteger(
189       "Platform.AnyCrashes.PerWeek", private_metrics_directory));
190   user_crashes_daily_count_.reset(new PersistentInteger(
191       "Platform.UserCrashes.PerDay", private_metrics_directory));
192   user_crashes_weekly_count_.reset(new PersistentInteger(
193       "Platform.UserCrashes.PerWeek", private_metrics_directory));
194   kernel_crashes_daily_count_.reset(new PersistentInteger(
195       "Platform.KernelCrashes.PerDay", private_metrics_directory));
196   kernel_crashes_weekly_count_.reset(new PersistentInteger(
197       "Platform.KernelCrashes.PerWeek", private_metrics_directory));
198   kernel_crashes_version_count_.reset(new PersistentInteger(
199       "Platform.KernelCrashesSinceUpdate", private_metrics_directory));
200   unclean_shutdowns_daily_count_.reset(new PersistentInteger(
201       "Platform.UncleanShutdown.PerDay", private_metrics_directory));
202   unclean_shutdowns_weekly_count_.reset(new PersistentInteger(
203       "Platform.UncleanShutdowns.PerWeek", private_metrics_directory));
204 
205   daily_cycle_.reset(
206       new PersistentInteger("daily.cycle", private_metrics_directory));
207   weekly_cycle_.reset(
208       new PersistentInteger("weekly.cycle", private_metrics_directory));
209   version_cycle_.reset(
210       new PersistentInteger("version.cycle", private_metrics_directory));
211 
212   disk_usage_collector_.reset(new DiskUsageCollector(metrics_lib_));
213   averaged_stats_collector_.reset(
214       new AveragedStatisticsCollector(metrics_lib_, diskstats_path,
215                                       kVmStatFileName));
216   cpu_usage_collector_.reset(new CpuUsageCollector(metrics_lib_));
217 }
218 
OnInit()219 int MetricsCollector::OnInit() {
220   int return_code = brillo::Daemon::OnInit();
221   if (return_code != EX_OK)
222     return return_code;
223 
224   StatsReporterInit();
225 
226   // Start collecting meminfo stats.
227   ScheduleMeminfoCallback(kMetricMeminfoInterval);
228   memuse_final_time_ = GetActiveTime() + kMemuseIntervals[0];
229   ScheduleMemuseCallback(kMemuseIntervals[0]);
230 
231   if (testing_)
232     return EX_OK;
233 
234   weave_service_subscription_ = weaved::Service::Connect(
235       brillo::MessageLoop::current(),
236       base::Bind(&MetricsCollector::OnWeaveServiceConnected,
237                  weak_ptr_factory_.GetWeakPtr()));
238 
239   latest_cpu_use_microseconds_ = cpu_usage_collector_->GetCumulativeCpuUse();
240   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
241       base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
242                  weak_ptr_factory_.GetWeakPtr()),
243       base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
244 
245   return EX_OK;
246 }
247 
OnWeaveServiceConnected(const std::weak_ptr<weaved::Service> & service)248 void MetricsCollector::OnWeaveServiceConnected(
249     const std::weak_ptr<weaved::Service>& service) {
250   service_ = service;
251   auto weave_service = service_.lock();
252   if (!weave_service)
253     return;
254 
255   weave_service->AddComponent(kWeaveComponent, {kWeaveTrait}, nullptr);
256   weave_service->AddCommandHandler(
257       kWeaveComponent, kWeaveTrait, "enableAnalyticsReporting",
258       base::Bind(&MetricsCollector::OnEnableMetrics,
259                  weak_ptr_factory_.GetWeakPtr()));
260   weave_service->AddCommandHandler(
261       kWeaveComponent, kWeaveTrait, "disableAnalyticsReporting",
262       base::Bind(&MetricsCollector::OnDisableMetrics,
263                  weak_ptr_factory_.GetWeakPtr()));
264 
265   UpdateWeaveState();
266 }
267 
OnEnableMetrics(std::unique_ptr<weaved::Command> command)268 void MetricsCollector::OnEnableMetrics(
269     std::unique_ptr<weaved::Command> command) {
270   if (base::WriteFile(
271           shared_metrics_directory_.Append(metrics::kConsentFileName), "", 0) !=
272       0) {
273     PLOG(ERROR) << "Could not create the consent file.";
274     command->Abort("metrics_error", "Could not create the consent file",
275                    nullptr);
276     return;
277   }
278 
279   UpdateWeaveState();
280   command->Complete({}, nullptr);
281 }
282 
OnDisableMetrics(std::unique_ptr<weaved::Command> command)283 void MetricsCollector::OnDisableMetrics(
284     std::unique_ptr<weaved::Command> command) {
285   if (!base::DeleteFile(
286           shared_metrics_directory_.Append(metrics::kConsentFileName), false)) {
287     PLOG(ERROR) << "Could not delete the consent file.";
288     command->Abort("metrics_error", "Could not delete the consent file",
289                    nullptr);
290     return;
291   }
292 
293   UpdateWeaveState();
294   command->Complete({}, nullptr);
295 }
296 
UpdateWeaveState()297 void MetricsCollector::UpdateWeaveState() {
298   auto weave_service = service_.lock();
299   if (!weave_service)
300     return;
301 
302   std::string enabled =
303       metrics_lib_->AreMetricsEnabled() ? "enabled" : "disabled";
304 
305   if (!weave_service->SetStateProperty(kWeaveComponent, kWeaveTrait,
306                                        "analyticsReportingState",
307                                        *brillo::ToValue(enabled),
308                                        nullptr)) {
309     LOG(ERROR) << "failed to update weave's state";
310   }
311 }
312 
ProcessUserCrash()313 void MetricsCollector::ProcessUserCrash() {
314   // Counts the active time up to now.
315   UpdateStats(TimeTicks::Now(), Time::Now());
316 
317   // Reports the active use time since the last crash and resets it.
318   SendAndResetCrashIntervalSample(user_crash_interval_);
319 
320   any_crashes_daily_count_->Add(1);
321   any_crashes_weekly_count_->Add(1);
322   user_crashes_daily_count_->Add(1);
323   user_crashes_weekly_count_->Add(1);
324 }
325 
ProcessKernelCrash()326 void MetricsCollector::ProcessKernelCrash() {
327   // Counts the active time up to now.
328   UpdateStats(TimeTicks::Now(), Time::Now());
329 
330   // Reports the active use time since the last crash and resets it.
331   SendAndResetCrashIntervalSample(kernel_crash_interval_);
332 
333   any_crashes_daily_count_->Add(1);
334   any_crashes_weekly_count_->Add(1);
335   kernel_crashes_daily_count_->Add(1);
336   kernel_crashes_weekly_count_->Add(1);
337 
338   kernel_crashes_version_count_->Add(1);
339 }
340 
ProcessUncleanShutdown()341 void MetricsCollector::ProcessUncleanShutdown() {
342   // Counts the active time up to now.
343   UpdateStats(TimeTicks::Now(), Time::Now());
344 
345   // Reports the active use time since the last crash and resets it.
346   SendAndResetCrashIntervalSample(unclean_shutdown_interval_);
347 
348   unclean_shutdowns_daily_count_->Add(1);
349   unclean_shutdowns_weekly_count_->Add(1);
350   any_crashes_daily_count_->Add(1);
351   any_crashes_weekly_count_->Add(1);
352 }
353 
CheckSystemCrash(const string & crash_file)354 bool MetricsCollector::CheckSystemCrash(const string& crash_file) {
355   FilePath crash_detected(crash_file);
356   if (!base::PathExists(crash_detected))
357     return false;
358 
359   // Deletes the crash-detected file so that the daemon doesn't report
360   // another kernel crash in case it's restarted.
361   base::DeleteFile(crash_detected, false);  // not recursive
362   return true;
363 }
364 
StatsReporterInit()365 void MetricsCollector::StatsReporterInit() {
366   disk_usage_collector_->Schedule();
367 
368   cpu_usage_collector_->Init();
369   cpu_usage_collector_->Schedule();
370 
371   // Don't start a collection cycle during the first run to avoid delaying the
372   // boot.
373   averaged_stats_collector_->ScheduleWait();
374 }
375 
ScheduleMeminfoCallback(int wait)376 void MetricsCollector::ScheduleMeminfoCallback(int wait) {
377   if (testing_) {
378     return;
379   }
380   base::TimeDelta waitDelta = base::TimeDelta::FromSeconds(wait);
381   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
382       base::Bind(&MetricsCollector::MeminfoCallback,
383                  weak_ptr_factory_.GetWeakPtr(), waitDelta),
384       waitDelta);
385 }
386 
MeminfoCallback(base::TimeDelta wait)387 void MetricsCollector::MeminfoCallback(base::TimeDelta wait) {
388   string meminfo_raw;
389   const FilePath meminfo_path(kMeminfoFileName);
390   if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
391     LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
392     return;
393   }
394   // Make both calls even if the first one fails.
395   if (ProcessMeminfo(meminfo_raw)) {
396     base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
397         base::Bind(&MetricsCollector::MeminfoCallback,
398                    weak_ptr_factory_.GetWeakPtr(), wait),
399         wait);
400   }
401 }
402 
403 // static
ReadFileToUint64(const base::FilePath & path,uint64_t * value)404 bool MetricsCollector::ReadFileToUint64(const base::FilePath& path,
405                                          uint64_t* value) {
406   std::string content;
407   if (!base::ReadFileToString(path, &content)) {
408     PLOG(WARNING) << "cannot read " << path.MaybeAsASCII();
409     return false;
410   }
411   // Remove final newline.
412   base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content);
413   if (!base::StringToUint64(content, value)) {
414     LOG(WARNING) << "invalid integer: " << content;
415     return false;
416   }
417   return true;
418 }
419 
ReportZram(const base::FilePath & zram_dir)420 bool MetricsCollector::ReportZram(const base::FilePath& zram_dir) {
421   // Data sizes are in bytes.  |zero_pages| is in number of pages.
422   uint64_t compr_data_size, orig_data_size, zero_pages;
423   const size_t page_size = 4096;
424 
425   if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName),
426                         &compr_data_size) ||
427       !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) ||
428       !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) {
429     return false;
430   }
431 
432   // |orig_data_size| does not include zero-filled pages.
433   orig_data_size += zero_pages * page_size;
434 
435   const int compr_data_size_mb = compr_data_size >> 20;
436   const int savings_mb = (orig_data_size - compr_data_size) >> 20;
437   const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size;
438 
439   // Report compressed size in megabytes.  100 MB or less has little impact.
440   SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50);
441   SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50);
442   // The compression ratio is multiplied by 100 for better resolution.  The
443   // ratios of interest are between 1 and 6 (100% and 600% as reported).  We
444   // don't want samples when very little memory is being compressed.
445   if (compr_data_size_mb >= 1) {
446     SendSample("Platform.ZramCompressionRatioPercent",
447                orig_data_size * 100 / compr_data_size, 100, 600, 50);
448   }
449   // The values of interest for zero_pages are between 1MB and 1GB.  The units
450   // are number of pages.
451   SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50);
452   SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50);
453 
454   return true;
455 }
456 
ProcessMeminfo(const string & meminfo_raw)457 bool MetricsCollector::ProcessMeminfo(const string& meminfo_raw) {
458   static const MeminfoRecord fields_array[] = {
459     { "MemTotal", "MemTotal" },  // SPECIAL CASE: total system memory
460     { "MemFree", "MemFree" },
461     { "Buffers", "Buffers" },
462     { "Cached", "Cached" },
463     // { "SwapCached", "SwapCached" },
464     { "Active", "Active" },
465     { "Inactive", "Inactive" },
466     { "ActiveAnon", "Active(anon)" },
467     { "InactiveAnon", "Inactive(anon)" },
468     { "ActiveFile" , "Active(file)" },
469     { "InactiveFile", "Inactive(file)" },
470     { "Unevictable", "Unevictable", kMeminfoOp_HistLog },
471     // { "Mlocked", "Mlocked" },
472     { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal },
473     { "SwapFree", "SwapFree", kMeminfoOp_SwapFree },
474     // { "Dirty", "Dirty" },
475     // { "Writeback", "Writeback" },
476     { "AnonPages", "AnonPages" },
477     { "Mapped", "Mapped" },
478     { "Shmem", "Shmem", kMeminfoOp_HistLog },
479     { "Slab", "Slab", kMeminfoOp_HistLog },
480     // { "SReclaimable", "SReclaimable" },
481     // { "SUnreclaim", "SUnreclaim" },
482   };
483   vector<MeminfoRecord> fields(fields_array,
484                                fields_array + arraysize(fields_array));
485   if (!FillMeminfo(meminfo_raw, &fields)) {
486     return false;
487   }
488   int total_memory = fields[0].value;
489   if (total_memory == 0) {
490     // this "cannot happen"
491     LOG(WARNING) << "borked meminfo parser";
492     return false;
493   }
494   int swap_total = 0;
495   int swap_free = 0;
496   // Send all fields retrieved, except total memory.
497   for (unsigned int i = 1; i < fields.size(); i++) {
498     string metrics_name = base::StringPrintf("Platform.Meminfo%s",
499                                              fields[i].name);
500     int percent;
501     switch (fields[i].op) {
502       case kMeminfoOp_HistPercent:
503         // report value as percent of total memory
504         percent = fields[i].value * 100 / total_memory;
505         SendLinearSample(metrics_name, percent, 100, 101);
506         break;
507       case kMeminfoOp_HistLog:
508         // report value in kbytes, log scale, 4Gb max
509         SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100);
510         break;
511       case kMeminfoOp_SwapTotal:
512         swap_total = fields[i].value;
513       case kMeminfoOp_SwapFree:
514         swap_free = fields[i].value;
515         break;
516     }
517   }
518   if (swap_total > 0) {
519     int swap_used = swap_total - swap_free;
520     int swap_used_percent = swap_used * 100 / swap_total;
521     SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100);
522     SendLinearSample("Platform.MeminfoSwapUsed.Percent", swap_used_percent,
523                      100, 101);
524   }
525   return true;
526 }
527 
FillMeminfo(const string & meminfo_raw,vector<MeminfoRecord> * fields)528 bool MetricsCollector::FillMeminfo(const string& meminfo_raw,
529                                     vector<MeminfoRecord>* fields) {
530   vector<std::string> lines =
531       base::SplitString(meminfo_raw, "\n", base::KEEP_WHITESPACE,
532                         base::SPLIT_WANT_NONEMPTY);
533 
534   // Scan meminfo output and collect field values.  Each field name has to
535   // match a meminfo entry (case insensitive) after removing non-alpha
536   // characters from the entry.
537   size_t ifield = 0;
538   for (size_t iline = 0;
539        iline < lines.size() && ifield < fields->size();
540        iline++) {
541     vector<string> tokens =
542         base::SplitString(lines[iline], ": ", base::KEEP_WHITESPACE,
543                           base::SPLIT_WANT_NONEMPTY);
544     if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) {
545       // Name matches. Parse value and save.
546       if (!base::StringToInt(tokens[1], &(*fields)[ifield].value)) {
547         LOG(WARNING) << "Cound not convert " << tokens[1] << " to int";
548         return false;
549       }
550       ifield++;
551     }
552   }
553   if (ifield < fields->size()) {
554     // End of input reached while scanning.
555     LOG(WARNING) << "cannot find field " << (*fields)[ifield].match
556                  << " and following";
557     return false;
558   }
559   return true;
560 }
561 
ScheduleMemuseCallback(double interval)562 void MetricsCollector::ScheduleMemuseCallback(double interval) {
563   if (testing_) {
564     return;
565   }
566   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
567       base::Bind(&MetricsCollector::MemuseCallback,
568                  weak_ptr_factory_.GetWeakPtr()),
569       base::TimeDelta::FromSeconds(interval));
570 }
571 
MemuseCallback()572 void MetricsCollector::MemuseCallback() {
573   // Since we only care about active time (i.e. uptime minus sleep time) but
574   // the callbacks are driven by real time (uptime), we check if we should
575   // reschedule this callback due to intervening sleep periods.
576   double now = GetActiveTime();
577   // Avoid intervals of less than one second.
578   double remaining_time = ceil(memuse_final_time_ - now);
579   if (remaining_time > 0) {
580     ScheduleMemuseCallback(remaining_time);
581   } else {
582     // Report stats and advance the measurement interval unless there are
583     // errors or we've completed the last interval.
584     if (MemuseCallbackWork() &&
585         memuse_interval_index_ < arraysize(kMemuseIntervals)) {
586       double interval = kMemuseIntervals[memuse_interval_index_++];
587       memuse_final_time_ = now + interval;
588       ScheduleMemuseCallback(interval);
589     }
590   }
591 }
592 
MemuseCallbackWork()593 bool MetricsCollector::MemuseCallbackWork() {
594   string meminfo_raw;
595   const FilePath meminfo_path(kMeminfoFileName);
596   if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
597     LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
598     return false;
599   }
600   return ProcessMemuse(meminfo_raw);
601 }
602 
ProcessMemuse(const string & meminfo_raw)603 bool MetricsCollector::ProcessMemuse(const string& meminfo_raw) {
604   static const MeminfoRecord fields_array[] = {
605     { "MemTotal", "MemTotal" },  // SPECIAL CASE: total system memory
606     { "ActiveAnon", "Active(anon)" },
607     { "InactiveAnon", "Inactive(anon)" },
608   };
609   vector<MeminfoRecord> fields(fields_array,
610                                fields_array + arraysize(fields_array));
611   if (!FillMeminfo(meminfo_raw, &fields)) {
612     return false;
613   }
614   int total = fields[0].value;
615   int active_anon = fields[1].value;
616   int inactive_anon = fields[2].value;
617   if (total == 0) {
618     // this "cannot happen"
619     LOG(WARNING) << "borked meminfo parser";
620     return false;
621   }
622   string metrics_name = base::StringPrintf("Platform.MemuseAnon%d",
623                                            memuse_interval_index_);
624   SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total,
625                    100, 101);
626   return true;
627 }
628 
SendSample(const string & name,int sample,int min,int max,int nbuckets)629 void MetricsCollector::SendSample(const string& name, int sample,
630                                    int min, int max, int nbuckets) {
631   metrics_lib_->SendToUMA(name, sample, min, max, nbuckets);
632 }
633 
SendKernelCrashesCumulativeCountStats()634 void MetricsCollector::SendKernelCrashesCumulativeCountStats() {
635   // Report the number of crashes for this OS version, but don't clear the
636   // counter.  It is cleared elsewhere on version change.
637   int64_t crashes_count = kernel_crashes_version_count_->Get();
638   SendSample(kernel_crashes_version_count_->Name(),
639              crashes_count,
640              1,                         // value of first bucket
641              500,                       // value of last bucket
642              100);                      // number of buckets
643 
644 
645   int64_t cpu_use_ms = version_cumulative_cpu_use_->Get();
646   SendSample(version_cumulative_cpu_use_->Name(),
647              cpu_use_ms / 1000,         // stat is in seconds
648              1,                         // device may be used very little...
649              8 * 1000 * 1000,           // ... or a lot (a little over 90 days)
650              100);
651 
652   // On the first run after an autoupdate, cpu_use_ms and active_use_seconds
653   // can be zero.  Avoid division by zero.
654   if (cpu_use_ms > 0) {
655     // Send the crash frequency since update in number of crashes per CPU year.
656     SendSample("Logging.KernelCrashesPerCpuYear",
657                crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms,
658                1,
659                1000 * 1000,     // about one crash every 30s of CPU time
660                100);
661   }
662 
663   int64_t active_use_seconds = version_cumulative_active_use_->Get();
664   if (active_use_seconds > 0) {
665     SendSample(version_cumulative_active_use_->Name(),
666                active_use_seconds,
667                1,                          // device may be used very little...
668                8 * 1000 * 1000,            // ... or a lot (about 90 days)
669                100);
670     // Same as above, but per year of active time.
671     SendSample("Logging.KernelCrashesPerActiveYear",
672                crashes_count * kSecondsPerDay * 365 / active_use_seconds,
673                1,
674                1000 * 1000,     // about one crash every 30s of active time
675                100);
676   }
677 }
678 
SendAndResetDailyUseSample(const unique_ptr<PersistentInteger> & use)679 void MetricsCollector::SendAndResetDailyUseSample(
680     const unique_ptr<PersistentInteger>& use) {
681   SendSample(use->Name(),
682              use->GetAndClear(),
683              1,                        // value of first bucket
684              kSecondsPerDay,           // value of last bucket
685              50);                      // number of buckets
686 }
687 
SendAndResetCrashIntervalSample(const unique_ptr<PersistentInteger> & interval)688 void MetricsCollector::SendAndResetCrashIntervalSample(
689     const unique_ptr<PersistentInteger>& interval) {
690   SendSample(interval->Name(),
691              interval->GetAndClear(),
692              1,                        // value of first bucket
693              4 * kSecondsPerWeek,      // value of last bucket
694              50);                      // number of buckets
695 }
696 
SendAndResetCrashFrequencySample(const unique_ptr<PersistentInteger> & frequency)697 void MetricsCollector::SendAndResetCrashFrequencySample(
698     const unique_ptr<PersistentInteger>& frequency) {
699   SendSample(frequency->Name(),
700              frequency->GetAndClear(),
701              1,                        // value of first bucket
702              100,                      // value of last bucket
703              50);                      // number of buckets
704 }
705 
SendLinearSample(const string & name,int sample,int max,int nbuckets)706 void MetricsCollector::SendLinearSample(const string& name, int sample,
707                                          int max, int nbuckets) {
708   // TODO(semenzato): add a proper linear histogram to the Chrome external
709   // metrics API.
710   LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale";
711   metrics_lib_->SendEnumToUMA(name, sample, max);
712 }
713 
UpdateStats(TimeTicks now_ticks,Time now_wall_time)714 void MetricsCollector::UpdateStats(TimeTicks now_ticks,
715                                     Time now_wall_time) {
716   const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds();
717   daily_active_use_->Add(elapsed_seconds);
718   version_cumulative_active_use_->Add(elapsed_seconds);
719   user_crash_interval_->Add(elapsed_seconds);
720   kernel_crash_interval_->Add(elapsed_seconds);
721   TimeDelta cpu_use = cpu_usage_collector_->GetCumulativeCpuUse();
722   version_cumulative_cpu_use_->Add(
723       (cpu_use - latest_cpu_use_microseconds_).InMilliseconds());
724   latest_cpu_use_microseconds_ = cpu_use;
725   last_update_stats_time_ = now_ticks;
726 
727   const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch();
728   const int day = since_epoch.InDays();
729   const int week = day / 7;
730 
731   if (daily_cycle_->Get() != day) {
732     daily_cycle_->Set(day);
733     SendAndResetDailyUseSample(daily_active_use_);
734     SendAndResetCrashFrequencySample(any_crashes_daily_count_);
735     SendAndResetCrashFrequencySample(user_crashes_daily_count_);
736     SendAndResetCrashFrequencySample(kernel_crashes_daily_count_);
737     SendAndResetCrashFrequencySample(unclean_shutdowns_daily_count_);
738     SendKernelCrashesCumulativeCountStats();
739   }
740 
741   if (weekly_cycle_->Get() != week) {
742     weekly_cycle_->Set(week);
743     SendAndResetCrashFrequencySample(any_crashes_weekly_count_);
744     SendAndResetCrashFrequencySample(user_crashes_weekly_count_);
745     SendAndResetCrashFrequencySample(kernel_crashes_weekly_count_);
746     SendAndResetCrashFrequencySample(unclean_shutdowns_weekly_count_);
747   }
748 }
749 
HandleUpdateStatsTimeout()750 void MetricsCollector::HandleUpdateStatsTimeout() {
751   UpdateStats(TimeTicks::Now(), Time::Now());
752   base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
753       base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
754                  weak_ptr_factory_.GetWeakPtr()),
755       base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
756 }
757