1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "metrics_collector.h"
18
19 #include <sysexits.h>
20 #include <time.h>
21
22 #include <memory>
23
24 #include <base/bind.h>
25 #include <base/files/file_path.h>
26 #include <base/files/file_util.h>
27 #include <base/hash.h>
28 #include <base/logging.h>
29 #include <base/strings/string_number_conversions.h>
30 #include <base/strings/string_split.h>
31 #include <base/strings/string_util.h>
32 #include <base/strings/stringprintf.h>
33 #include <brillo/binder_watcher.h>
34 #include <brillo/osrelease_reader.h>
35
36 #include "constants.h"
37 #include "metrics_collector_service_impl.h"
38
39 using base::FilePath;
40 using base::StringPrintf;
41 using base::Time;
42 using base::TimeDelta;
43 using base::TimeTicks;
44 using chromeos_metrics::PersistentInteger;
45 using std::map;
46 using std::string;
47 using std::vector;
48
49 namespace {
50
51 const int kSecondsPerMinute = 60;
52 const int kMinutesPerHour = 60;
53 const int kHoursPerDay = 24;
54 const int kMinutesPerDay = kHoursPerDay * kMinutesPerHour;
55 const int kSecondsPerDay = kSecondsPerMinute * kMinutesPerDay;
56 const int kDaysPerWeek = 7;
57 const int kSecondsPerWeek = kSecondsPerDay * kDaysPerWeek;
58
59 // Interval between calls to UpdateStats().
60 const uint32_t kUpdateStatsIntervalMs = 300000;
61
62 const char kKernelCrashDetectedFile[] =
63 "/data/misc/crash_reporter/run/kernel-crash-detected";
64 const char kUncleanShutdownDetectedFile[] =
65 "/var/run/unclean-shutdown-detected";
66
67 const int kMetricMeminfoInterval = 30; // seconds
68
69 const char kMeminfoFileName[] = "/proc/meminfo";
70 const char kVmStatFileName[] = "/proc/vmstat";
71
72 const char kWeaveComponent[] = "metrics";
73 const char kWeaveTrait[] = "_metrics";
74
75 } // namespace
76
77 // Zram sysfs entries.
78
79 const char MetricsCollector::kComprDataSizeName[] = "compr_data_size";
80 const char MetricsCollector::kOrigDataSizeName[] = "orig_data_size";
81 const char MetricsCollector::kZeroPagesName[] = "zero_pages";
82
83 // Memory use stats collection intervals. We collect some memory use interval
84 // at these intervals after boot, and we stop collecting after the last one,
85 // with the assumption that in most cases the memory use won't change much
86 // after that.
87 static const int kMemuseIntervals[] = {
88 1 * kSecondsPerMinute, // 1 minute mark
89 4 * kSecondsPerMinute, // 5 minute mark
90 25 * kSecondsPerMinute, // 0.5 hour mark
91 120 * kSecondsPerMinute, // 2.5 hour mark
92 600 * kSecondsPerMinute, // 12.5 hour mark
93 };
94
MetricsCollector()95 MetricsCollector::MetricsCollector()
96 : memuse_final_time_(0),
97 memuse_interval_index_(0) {}
98
~MetricsCollector()99 MetricsCollector::~MetricsCollector() {
100 }
101
102 // static
GetActiveTime()103 double MetricsCollector::GetActiveTime() {
104 struct timespec ts;
105 int r = clock_gettime(CLOCK_MONOTONIC, &ts);
106 if (r < 0) {
107 PLOG(WARNING) << "clock_gettime(CLOCK_MONOTONIC) failed";
108 return 0;
109 } else {
110 return ts.tv_sec + static_cast<double>(ts.tv_nsec) / (1000 * 1000 * 1000);
111 }
112 }
113
Run()114 int MetricsCollector::Run() {
115 if (CheckSystemCrash(kKernelCrashDetectedFile)) {
116 ProcessKernelCrash();
117 }
118
119 if (CheckSystemCrash(kUncleanShutdownDetectedFile)) {
120 ProcessUncleanShutdown();
121 }
122
123 // On OS version change, clear version stats (which are reported daily).
124 int32_t version = GetOsVersionHash();
125 if (version_cycle_->Get() != version) {
126 version_cycle_->Set(version);
127 kernel_crashes_version_count_->Set(0);
128 version_cumulative_active_use_->Set(0);
129 version_cumulative_cpu_use_->Set(0);
130 }
131
132 // Start metricscollectorservice
133 android::sp<BnMetricsCollectorServiceImpl> metrics_collector_service =
134 new BnMetricsCollectorServiceImpl(this);
135 android::status_t status = android::defaultServiceManager()->addService(
136 metrics_collector_service->getInterfaceDescriptor(),
137 metrics_collector_service);
138 CHECK(status == android::OK)
139 << "failed to register service metricscollectorservice";
140
141 // Watch Binder events in the main loop
142 brillo::BinderWatcher binder_watcher;
143 CHECK(binder_watcher.Init()) << "Binder FD watcher init failed";
144 return brillo::Daemon::Run();
145 }
146
GetOsVersionHash()147 uint32_t MetricsCollector::GetOsVersionHash() {
148 brillo::OsReleaseReader reader;
149 reader.Load();
150 string version;
151 if (!reader.GetString(metrics::kProductVersion, &version)) {
152 LOG(ERROR) << "failed to read the product version.";
153 version = metrics::kDefaultVersion;
154 }
155
156 uint32_t version_hash = base::Hash(version);
157 if (testing_) {
158 version_hash = 42; // return any plausible value for the hash
159 }
160 return version_hash;
161 }
162
Init(bool testing,MetricsLibraryInterface * metrics_lib,const string & diskstats_path,const base::FilePath & private_metrics_directory,const base::FilePath & shared_metrics_directory)163 void MetricsCollector::Init(bool testing, MetricsLibraryInterface* metrics_lib,
164 const string& diskstats_path,
165 const base::FilePath& private_metrics_directory,
166 const base::FilePath& shared_metrics_directory) {
167 CHECK(metrics_lib);
168 testing_ = testing;
169 shared_metrics_directory_ = shared_metrics_directory;
170 metrics_lib_ = metrics_lib;
171
172 daily_active_use_.reset(new PersistentInteger("Platform.UseTime.PerDay",
173 private_metrics_directory));
174 version_cumulative_active_use_.reset(new PersistentInteger(
175 "Platform.CumulativeUseTime", private_metrics_directory));
176 version_cumulative_cpu_use_.reset(new PersistentInteger(
177 "Platform.CumulativeCpuTime", private_metrics_directory));
178
179 kernel_crash_interval_.reset(new PersistentInteger(
180 "Platform.KernelCrashInterval", private_metrics_directory));
181 unclean_shutdown_interval_.reset(new PersistentInteger(
182 "Platform.UncleanShutdownInterval", private_metrics_directory));
183 user_crash_interval_.reset(new PersistentInteger("Platform.UserCrashInterval",
184 private_metrics_directory));
185
186 any_crashes_daily_count_.reset(new PersistentInteger(
187 "Platform.AnyCrashes.PerDay", private_metrics_directory));
188 any_crashes_weekly_count_.reset(new PersistentInteger(
189 "Platform.AnyCrashes.PerWeek", private_metrics_directory));
190 user_crashes_daily_count_.reset(new PersistentInteger(
191 "Platform.UserCrashes.PerDay", private_metrics_directory));
192 user_crashes_weekly_count_.reset(new PersistentInteger(
193 "Platform.UserCrashes.PerWeek", private_metrics_directory));
194 kernel_crashes_daily_count_.reset(new PersistentInteger(
195 "Platform.KernelCrashes.PerDay", private_metrics_directory));
196 kernel_crashes_weekly_count_.reset(new PersistentInteger(
197 "Platform.KernelCrashes.PerWeek", private_metrics_directory));
198 kernel_crashes_version_count_.reset(new PersistentInteger(
199 "Platform.KernelCrashesSinceUpdate", private_metrics_directory));
200 unclean_shutdowns_daily_count_.reset(new PersistentInteger(
201 "Platform.UncleanShutdown.PerDay", private_metrics_directory));
202 unclean_shutdowns_weekly_count_.reset(new PersistentInteger(
203 "Platform.UncleanShutdowns.PerWeek", private_metrics_directory));
204
205 daily_cycle_.reset(
206 new PersistentInteger("daily.cycle", private_metrics_directory));
207 weekly_cycle_.reset(
208 new PersistentInteger("weekly.cycle", private_metrics_directory));
209 version_cycle_.reset(
210 new PersistentInteger("version.cycle", private_metrics_directory));
211
212 disk_usage_collector_.reset(new DiskUsageCollector(metrics_lib_));
213 averaged_stats_collector_.reset(
214 new AveragedStatisticsCollector(metrics_lib_, diskstats_path,
215 kVmStatFileName));
216 cpu_usage_collector_.reset(new CpuUsageCollector(metrics_lib_));
217 }
218
OnInit()219 int MetricsCollector::OnInit() {
220 int return_code = brillo::Daemon::OnInit();
221 if (return_code != EX_OK)
222 return return_code;
223
224 StatsReporterInit();
225
226 // Start collecting meminfo stats.
227 ScheduleMeminfoCallback(kMetricMeminfoInterval);
228 memuse_final_time_ = GetActiveTime() + kMemuseIntervals[0];
229 ScheduleMemuseCallback(kMemuseIntervals[0]);
230
231 if (testing_)
232 return EX_OK;
233
234 weave_service_subscription_ = weaved::Service::Connect(
235 brillo::MessageLoop::current(),
236 base::Bind(&MetricsCollector::OnWeaveServiceConnected,
237 weak_ptr_factory_.GetWeakPtr()));
238
239 latest_cpu_use_microseconds_ = cpu_usage_collector_->GetCumulativeCpuUse();
240 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
241 base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
242 weak_ptr_factory_.GetWeakPtr()),
243 base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
244
245 return EX_OK;
246 }
247
OnWeaveServiceConnected(const std::weak_ptr<weaved::Service> & service)248 void MetricsCollector::OnWeaveServiceConnected(
249 const std::weak_ptr<weaved::Service>& service) {
250 service_ = service;
251 auto weave_service = service_.lock();
252 if (!weave_service)
253 return;
254
255 weave_service->AddComponent(kWeaveComponent, {kWeaveTrait}, nullptr);
256 weave_service->AddCommandHandler(
257 kWeaveComponent, kWeaveTrait, "enableAnalyticsReporting",
258 base::Bind(&MetricsCollector::OnEnableMetrics,
259 weak_ptr_factory_.GetWeakPtr()));
260 weave_service->AddCommandHandler(
261 kWeaveComponent, kWeaveTrait, "disableAnalyticsReporting",
262 base::Bind(&MetricsCollector::OnDisableMetrics,
263 weak_ptr_factory_.GetWeakPtr()));
264
265 UpdateWeaveState();
266 }
267
OnEnableMetrics(std::unique_ptr<weaved::Command> command)268 void MetricsCollector::OnEnableMetrics(
269 std::unique_ptr<weaved::Command> command) {
270 if (base::WriteFile(
271 shared_metrics_directory_.Append(metrics::kConsentFileName), "", 0) !=
272 0) {
273 PLOG(ERROR) << "Could not create the consent file.";
274 command->Abort("metrics_error", "Could not create the consent file",
275 nullptr);
276 return;
277 }
278
279 UpdateWeaveState();
280 command->Complete({}, nullptr);
281 }
282
OnDisableMetrics(std::unique_ptr<weaved::Command> command)283 void MetricsCollector::OnDisableMetrics(
284 std::unique_ptr<weaved::Command> command) {
285 if (!base::DeleteFile(
286 shared_metrics_directory_.Append(metrics::kConsentFileName), false)) {
287 PLOG(ERROR) << "Could not delete the consent file.";
288 command->Abort("metrics_error", "Could not delete the consent file",
289 nullptr);
290 return;
291 }
292
293 UpdateWeaveState();
294 command->Complete({}, nullptr);
295 }
296
UpdateWeaveState()297 void MetricsCollector::UpdateWeaveState() {
298 auto weave_service = service_.lock();
299 if (!weave_service)
300 return;
301
302 std::string enabled =
303 metrics_lib_->AreMetricsEnabled() ? "enabled" : "disabled";
304
305 if (!weave_service->SetStateProperty(kWeaveComponent, kWeaveTrait,
306 "analyticsReportingState",
307 *brillo::ToValue(enabled),
308 nullptr)) {
309 LOG(ERROR) << "failed to update weave's state";
310 }
311 }
312
ProcessUserCrash()313 void MetricsCollector::ProcessUserCrash() {
314 // Counts the active time up to now.
315 UpdateStats(TimeTicks::Now(), Time::Now());
316
317 // Reports the active use time since the last crash and resets it.
318 SendAndResetCrashIntervalSample(user_crash_interval_);
319
320 any_crashes_daily_count_->Add(1);
321 any_crashes_weekly_count_->Add(1);
322 user_crashes_daily_count_->Add(1);
323 user_crashes_weekly_count_->Add(1);
324 }
325
ProcessKernelCrash()326 void MetricsCollector::ProcessKernelCrash() {
327 // Counts the active time up to now.
328 UpdateStats(TimeTicks::Now(), Time::Now());
329
330 // Reports the active use time since the last crash and resets it.
331 SendAndResetCrashIntervalSample(kernel_crash_interval_);
332
333 any_crashes_daily_count_->Add(1);
334 any_crashes_weekly_count_->Add(1);
335 kernel_crashes_daily_count_->Add(1);
336 kernel_crashes_weekly_count_->Add(1);
337
338 kernel_crashes_version_count_->Add(1);
339 }
340
ProcessUncleanShutdown()341 void MetricsCollector::ProcessUncleanShutdown() {
342 // Counts the active time up to now.
343 UpdateStats(TimeTicks::Now(), Time::Now());
344
345 // Reports the active use time since the last crash and resets it.
346 SendAndResetCrashIntervalSample(unclean_shutdown_interval_);
347
348 unclean_shutdowns_daily_count_->Add(1);
349 unclean_shutdowns_weekly_count_->Add(1);
350 any_crashes_daily_count_->Add(1);
351 any_crashes_weekly_count_->Add(1);
352 }
353
CheckSystemCrash(const string & crash_file)354 bool MetricsCollector::CheckSystemCrash(const string& crash_file) {
355 FilePath crash_detected(crash_file);
356 if (!base::PathExists(crash_detected))
357 return false;
358
359 // Deletes the crash-detected file so that the daemon doesn't report
360 // another kernel crash in case it's restarted.
361 base::DeleteFile(crash_detected, false); // not recursive
362 return true;
363 }
364
StatsReporterInit()365 void MetricsCollector::StatsReporterInit() {
366 disk_usage_collector_->Schedule();
367
368 cpu_usage_collector_->Init();
369 cpu_usage_collector_->Schedule();
370
371 // Don't start a collection cycle during the first run to avoid delaying the
372 // boot.
373 averaged_stats_collector_->ScheduleWait();
374 }
375
ScheduleMeminfoCallback(int wait)376 void MetricsCollector::ScheduleMeminfoCallback(int wait) {
377 if (testing_) {
378 return;
379 }
380 base::TimeDelta waitDelta = base::TimeDelta::FromSeconds(wait);
381 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
382 base::Bind(&MetricsCollector::MeminfoCallback,
383 weak_ptr_factory_.GetWeakPtr(), waitDelta),
384 waitDelta);
385 }
386
MeminfoCallback(base::TimeDelta wait)387 void MetricsCollector::MeminfoCallback(base::TimeDelta wait) {
388 string meminfo_raw;
389 const FilePath meminfo_path(kMeminfoFileName);
390 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
391 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
392 return;
393 }
394 // Make both calls even if the first one fails.
395 if (ProcessMeminfo(meminfo_raw)) {
396 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
397 base::Bind(&MetricsCollector::MeminfoCallback,
398 weak_ptr_factory_.GetWeakPtr(), wait),
399 wait);
400 }
401 }
402
403 // static
ReadFileToUint64(const base::FilePath & path,uint64_t * value)404 bool MetricsCollector::ReadFileToUint64(const base::FilePath& path,
405 uint64_t* value) {
406 std::string content;
407 if (!base::ReadFileToString(path, &content)) {
408 PLOG(WARNING) << "cannot read " << path.MaybeAsASCII();
409 return false;
410 }
411 // Remove final newline.
412 base::TrimWhitespaceASCII(content, base::TRIM_TRAILING, &content);
413 if (!base::StringToUint64(content, value)) {
414 LOG(WARNING) << "invalid integer: " << content;
415 return false;
416 }
417 return true;
418 }
419
ReportZram(const base::FilePath & zram_dir)420 bool MetricsCollector::ReportZram(const base::FilePath& zram_dir) {
421 // Data sizes are in bytes. |zero_pages| is in number of pages.
422 uint64_t compr_data_size, orig_data_size, zero_pages;
423 const size_t page_size = 4096;
424
425 if (!ReadFileToUint64(zram_dir.Append(kComprDataSizeName),
426 &compr_data_size) ||
427 !ReadFileToUint64(zram_dir.Append(kOrigDataSizeName), &orig_data_size) ||
428 !ReadFileToUint64(zram_dir.Append(kZeroPagesName), &zero_pages)) {
429 return false;
430 }
431
432 // |orig_data_size| does not include zero-filled pages.
433 orig_data_size += zero_pages * page_size;
434
435 const int compr_data_size_mb = compr_data_size >> 20;
436 const int savings_mb = (orig_data_size - compr_data_size) >> 20;
437 const int zero_ratio_percent = zero_pages * page_size * 100 / orig_data_size;
438
439 // Report compressed size in megabytes. 100 MB or less has little impact.
440 SendSample("Platform.ZramCompressedSize", compr_data_size_mb, 100, 4000, 50);
441 SendSample("Platform.ZramSavings", savings_mb, 100, 4000, 50);
442 // The compression ratio is multiplied by 100 for better resolution. The
443 // ratios of interest are between 1 and 6 (100% and 600% as reported). We
444 // don't want samples when very little memory is being compressed.
445 if (compr_data_size_mb >= 1) {
446 SendSample("Platform.ZramCompressionRatioPercent",
447 orig_data_size * 100 / compr_data_size, 100, 600, 50);
448 }
449 // The values of interest for zero_pages are between 1MB and 1GB. The units
450 // are number of pages.
451 SendSample("Platform.ZramZeroPages", zero_pages, 256, 256 * 1024, 50);
452 SendSample("Platform.ZramZeroRatioPercent", zero_ratio_percent, 1, 50, 50);
453
454 return true;
455 }
456
ProcessMeminfo(const string & meminfo_raw)457 bool MetricsCollector::ProcessMeminfo(const string& meminfo_raw) {
458 static const MeminfoRecord fields_array[] = {
459 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
460 { "MemFree", "MemFree" },
461 { "Buffers", "Buffers" },
462 { "Cached", "Cached" },
463 // { "SwapCached", "SwapCached" },
464 { "Active", "Active" },
465 { "Inactive", "Inactive" },
466 { "ActiveAnon", "Active(anon)" },
467 { "InactiveAnon", "Inactive(anon)" },
468 { "ActiveFile" , "Active(file)" },
469 { "InactiveFile", "Inactive(file)" },
470 { "Unevictable", "Unevictable", kMeminfoOp_HistLog },
471 // { "Mlocked", "Mlocked" },
472 { "SwapTotal", "SwapTotal", kMeminfoOp_SwapTotal },
473 { "SwapFree", "SwapFree", kMeminfoOp_SwapFree },
474 // { "Dirty", "Dirty" },
475 // { "Writeback", "Writeback" },
476 { "AnonPages", "AnonPages" },
477 { "Mapped", "Mapped" },
478 { "Shmem", "Shmem", kMeminfoOp_HistLog },
479 { "Slab", "Slab", kMeminfoOp_HistLog },
480 // { "SReclaimable", "SReclaimable" },
481 // { "SUnreclaim", "SUnreclaim" },
482 };
483 vector<MeminfoRecord> fields(fields_array,
484 fields_array + arraysize(fields_array));
485 if (!FillMeminfo(meminfo_raw, &fields)) {
486 return false;
487 }
488 int total_memory = fields[0].value;
489 if (total_memory == 0) {
490 // this "cannot happen"
491 LOG(WARNING) << "borked meminfo parser";
492 return false;
493 }
494 int swap_total = 0;
495 int swap_free = 0;
496 // Send all fields retrieved, except total memory.
497 for (unsigned int i = 1; i < fields.size(); i++) {
498 string metrics_name = base::StringPrintf("Platform.Meminfo%s",
499 fields[i].name);
500 int percent;
501 switch (fields[i].op) {
502 case kMeminfoOp_HistPercent:
503 // report value as percent of total memory
504 percent = fields[i].value * 100 / total_memory;
505 SendLinearSample(metrics_name, percent, 100, 101);
506 break;
507 case kMeminfoOp_HistLog:
508 // report value in kbytes, log scale, 4Gb max
509 SendSample(metrics_name, fields[i].value, 1, 4 * 1000 * 1000, 100);
510 break;
511 case kMeminfoOp_SwapTotal:
512 swap_total = fields[i].value;
513 case kMeminfoOp_SwapFree:
514 swap_free = fields[i].value;
515 break;
516 }
517 }
518 if (swap_total > 0) {
519 int swap_used = swap_total - swap_free;
520 int swap_used_percent = swap_used * 100 / swap_total;
521 SendSample("Platform.MeminfoSwapUsed", swap_used, 1, 8 * 1000 * 1000, 100);
522 SendLinearSample("Platform.MeminfoSwapUsed.Percent", swap_used_percent,
523 100, 101);
524 }
525 return true;
526 }
527
FillMeminfo(const string & meminfo_raw,vector<MeminfoRecord> * fields)528 bool MetricsCollector::FillMeminfo(const string& meminfo_raw,
529 vector<MeminfoRecord>* fields) {
530 vector<std::string> lines =
531 base::SplitString(meminfo_raw, "\n", base::KEEP_WHITESPACE,
532 base::SPLIT_WANT_NONEMPTY);
533
534 // Scan meminfo output and collect field values. Each field name has to
535 // match a meminfo entry (case insensitive) after removing non-alpha
536 // characters from the entry.
537 size_t ifield = 0;
538 for (size_t iline = 0;
539 iline < lines.size() && ifield < fields->size();
540 iline++) {
541 vector<string> tokens =
542 base::SplitString(lines[iline], ": ", base::KEEP_WHITESPACE,
543 base::SPLIT_WANT_NONEMPTY);
544 if (strcmp((*fields)[ifield].match, tokens[0].c_str()) == 0) {
545 // Name matches. Parse value and save.
546 if (!base::StringToInt(tokens[1], &(*fields)[ifield].value)) {
547 LOG(WARNING) << "Cound not convert " << tokens[1] << " to int";
548 return false;
549 }
550 ifield++;
551 }
552 }
553 if (ifield < fields->size()) {
554 // End of input reached while scanning.
555 LOG(WARNING) << "cannot find field " << (*fields)[ifield].match
556 << " and following";
557 return false;
558 }
559 return true;
560 }
561
ScheduleMemuseCallback(double interval)562 void MetricsCollector::ScheduleMemuseCallback(double interval) {
563 if (testing_) {
564 return;
565 }
566 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
567 base::Bind(&MetricsCollector::MemuseCallback,
568 weak_ptr_factory_.GetWeakPtr()),
569 base::TimeDelta::FromSeconds(interval));
570 }
571
MemuseCallback()572 void MetricsCollector::MemuseCallback() {
573 // Since we only care about active time (i.e. uptime minus sleep time) but
574 // the callbacks are driven by real time (uptime), we check if we should
575 // reschedule this callback due to intervening sleep periods.
576 double now = GetActiveTime();
577 // Avoid intervals of less than one second.
578 double remaining_time = ceil(memuse_final_time_ - now);
579 if (remaining_time > 0) {
580 ScheduleMemuseCallback(remaining_time);
581 } else {
582 // Report stats and advance the measurement interval unless there are
583 // errors or we've completed the last interval.
584 if (MemuseCallbackWork() &&
585 memuse_interval_index_ < arraysize(kMemuseIntervals)) {
586 double interval = kMemuseIntervals[memuse_interval_index_++];
587 memuse_final_time_ = now + interval;
588 ScheduleMemuseCallback(interval);
589 }
590 }
591 }
592
MemuseCallbackWork()593 bool MetricsCollector::MemuseCallbackWork() {
594 string meminfo_raw;
595 const FilePath meminfo_path(kMeminfoFileName);
596 if (!base::ReadFileToString(meminfo_path, &meminfo_raw)) {
597 LOG(WARNING) << "cannot read " << meminfo_path.value().c_str();
598 return false;
599 }
600 return ProcessMemuse(meminfo_raw);
601 }
602
ProcessMemuse(const string & meminfo_raw)603 bool MetricsCollector::ProcessMemuse(const string& meminfo_raw) {
604 static const MeminfoRecord fields_array[] = {
605 { "MemTotal", "MemTotal" }, // SPECIAL CASE: total system memory
606 { "ActiveAnon", "Active(anon)" },
607 { "InactiveAnon", "Inactive(anon)" },
608 };
609 vector<MeminfoRecord> fields(fields_array,
610 fields_array + arraysize(fields_array));
611 if (!FillMeminfo(meminfo_raw, &fields)) {
612 return false;
613 }
614 int total = fields[0].value;
615 int active_anon = fields[1].value;
616 int inactive_anon = fields[2].value;
617 if (total == 0) {
618 // this "cannot happen"
619 LOG(WARNING) << "borked meminfo parser";
620 return false;
621 }
622 string metrics_name = base::StringPrintf("Platform.MemuseAnon%d",
623 memuse_interval_index_);
624 SendLinearSample(metrics_name, (active_anon + inactive_anon) * 100 / total,
625 100, 101);
626 return true;
627 }
628
SendSample(const string & name,int sample,int min,int max,int nbuckets)629 void MetricsCollector::SendSample(const string& name, int sample,
630 int min, int max, int nbuckets) {
631 metrics_lib_->SendToUMA(name, sample, min, max, nbuckets);
632 }
633
SendKernelCrashesCumulativeCountStats()634 void MetricsCollector::SendKernelCrashesCumulativeCountStats() {
635 // Report the number of crashes for this OS version, but don't clear the
636 // counter. It is cleared elsewhere on version change.
637 int64_t crashes_count = kernel_crashes_version_count_->Get();
638 SendSample(kernel_crashes_version_count_->Name(),
639 crashes_count,
640 1, // value of first bucket
641 500, // value of last bucket
642 100); // number of buckets
643
644
645 int64_t cpu_use_ms = version_cumulative_cpu_use_->Get();
646 SendSample(version_cumulative_cpu_use_->Name(),
647 cpu_use_ms / 1000, // stat is in seconds
648 1, // device may be used very little...
649 8 * 1000 * 1000, // ... or a lot (a little over 90 days)
650 100);
651
652 // On the first run after an autoupdate, cpu_use_ms and active_use_seconds
653 // can be zero. Avoid division by zero.
654 if (cpu_use_ms > 0) {
655 // Send the crash frequency since update in number of crashes per CPU year.
656 SendSample("Logging.KernelCrashesPerCpuYear",
657 crashes_count * kSecondsPerDay * 365 * 1000 / cpu_use_ms,
658 1,
659 1000 * 1000, // about one crash every 30s of CPU time
660 100);
661 }
662
663 int64_t active_use_seconds = version_cumulative_active_use_->Get();
664 if (active_use_seconds > 0) {
665 SendSample(version_cumulative_active_use_->Name(),
666 active_use_seconds,
667 1, // device may be used very little...
668 8 * 1000 * 1000, // ... or a lot (about 90 days)
669 100);
670 // Same as above, but per year of active time.
671 SendSample("Logging.KernelCrashesPerActiveYear",
672 crashes_count * kSecondsPerDay * 365 / active_use_seconds,
673 1,
674 1000 * 1000, // about one crash every 30s of active time
675 100);
676 }
677 }
678
SendAndResetDailyUseSample(const unique_ptr<PersistentInteger> & use)679 void MetricsCollector::SendAndResetDailyUseSample(
680 const unique_ptr<PersistentInteger>& use) {
681 SendSample(use->Name(),
682 use->GetAndClear(),
683 1, // value of first bucket
684 kSecondsPerDay, // value of last bucket
685 50); // number of buckets
686 }
687
SendAndResetCrashIntervalSample(const unique_ptr<PersistentInteger> & interval)688 void MetricsCollector::SendAndResetCrashIntervalSample(
689 const unique_ptr<PersistentInteger>& interval) {
690 SendSample(interval->Name(),
691 interval->GetAndClear(),
692 1, // value of first bucket
693 4 * kSecondsPerWeek, // value of last bucket
694 50); // number of buckets
695 }
696
SendAndResetCrashFrequencySample(const unique_ptr<PersistentInteger> & frequency)697 void MetricsCollector::SendAndResetCrashFrequencySample(
698 const unique_ptr<PersistentInteger>& frequency) {
699 SendSample(frequency->Name(),
700 frequency->GetAndClear(),
701 1, // value of first bucket
702 100, // value of last bucket
703 50); // number of buckets
704 }
705
SendLinearSample(const string & name,int sample,int max,int nbuckets)706 void MetricsCollector::SendLinearSample(const string& name, int sample,
707 int max, int nbuckets) {
708 // TODO(semenzato): add a proper linear histogram to the Chrome external
709 // metrics API.
710 LOG_IF(FATAL, nbuckets != max + 1) << "unsupported histogram scale";
711 metrics_lib_->SendEnumToUMA(name, sample, max);
712 }
713
UpdateStats(TimeTicks now_ticks,Time now_wall_time)714 void MetricsCollector::UpdateStats(TimeTicks now_ticks,
715 Time now_wall_time) {
716 const int elapsed_seconds = (now_ticks - last_update_stats_time_).InSeconds();
717 daily_active_use_->Add(elapsed_seconds);
718 version_cumulative_active_use_->Add(elapsed_seconds);
719 user_crash_interval_->Add(elapsed_seconds);
720 kernel_crash_interval_->Add(elapsed_seconds);
721 TimeDelta cpu_use = cpu_usage_collector_->GetCumulativeCpuUse();
722 version_cumulative_cpu_use_->Add(
723 (cpu_use - latest_cpu_use_microseconds_).InMilliseconds());
724 latest_cpu_use_microseconds_ = cpu_use;
725 last_update_stats_time_ = now_ticks;
726
727 const TimeDelta since_epoch = now_wall_time - Time::UnixEpoch();
728 const int day = since_epoch.InDays();
729 const int week = day / 7;
730
731 if (daily_cycle_->Get() != day) {
732 daily_cycle_->Set(day);
733 SendAndResetDailyUseSample(daily_active_use_);
734 SendAndResetCrashFrequencySample(any_crashes_daily_count_);
735 SendAndResetCrashFrequencySample(user_crashes_daily_count_);
736 SendAndResetCrashFrequencySample(kernel_crashes_daily_count_);
737 SendAndResetCrashFrequencySample(unclean_shutdowns_daily_count_);
738 SendKernelCrashesCumulativeCountStats();
739 }
740
741 if (weekly_cycle_->Get() != week) {
742 weekly_cycle_->Set(week);
743 SendAndResetCrashFrequencySample(any_crashes_weekly_count_);
744 SendAndResetCrashFrequencySample(user_crashes_weekly_count_);
745 SendAndResetCrashFrequencySample(kernel_crashes_weekly_count_);
746 SendAndResetCrashFrequencySample(unclean_shutdowns_weekly_count_);
747 }
748 }
749
HandleUpdateStatsTimeout()750 void MetricsCollector::HandleUpdateStatsTimeout() {
751 UpdateStats(TimeTicks::Now(), Time::Now());
752 base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
753 base::Bind(&MetricsCollector::HandleUpdateStatsTimeout,
754 weak_ptr_factory_.GetWeakPtr()),
755 base::TimeDelta::FromMilliseconds(kUpdateStatsIntervalMs));
756 }
757