1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef METRICS_METRICS_COLLECTOR_H_ 18 #define METRICS_METRICS_COLLECTOR_H_ 19 20 #include <stdint.h> 21 22 #include <map> 23 #include <memory> 24 #include <string> 25 #include <vector> 26 27 #include <base/files/file_path.h> 28 #include <base/memory/weak_ptr.h> 29 #include <base/time/time.h> 30 #include <brillo/binder_watcher.h> 31 #include <brillo/daemons/daemon.h> 32 #include <libweaved/command.h> 33 #include <libweaved/service.h> 34 #include <gtest/gtest_prod.h> // for FRIEND_TEST 35 36 #include "collectors/averaged_statistics_collector.h" 37 #include "collectors/cpu_usage_collector.h" 38 #include "collectors/disk_usage_collector.h" 39 #include "metrics/metrics_library.h" 40 #include "persistent_integer.h" 41 42 using chromeos_metrics::PersistentInteger; 43 using std::unique_ptr; 44 45 class MetricsCollector : public brillo::Daemon { 46 public: 47 MetricsCollector(); 48 ~MetricsCollector(); 49 50 // Initializes metrics class variables. 51 void Init(bool testing, 52 MetricsLibraryInterface* metrics_lib, 53 const std::string& diskstats_path, 54 const base::FilePath& private_metrics_directory, 55 const base::FilePath& shared_metrics_directory); 56 57 // Initializes the daemon. 58 int OnInit() override; 59 60 // Does all the work. 61 int Run() override; 62 63 // Returns the active time since boot (uptime minus sleep time) in seconds. 64 static double GetActiveTime(); 65 66 // Updates the active use time and logs time between user-space 67 // process crashes. Called via MetricsCollectorServiceTrampoline. 68 void ProcessUserCrash(); 69 70 protected: 71 // Used also by the unit tests. 72 static const char kComprDataSizeName[]; 73 static const char kOrigDataSizeName[]; 74 static const char kZeroPagesName[]; 75 76 private: 77 friend class MetricsCollectorTest; 78 FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash); 79 FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent); 80 FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast); 81 FRIEND_TEST(MetricsCollectorTest, GetHistogramPath); 82 FRIEND_TEST(MetricsCollectorTest, IsNewEpoch); 83 FRIEND_TEST(MetricsCollectorTest, MessageFilter); 84 FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash); 85 FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo); 86 FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2); 87 FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown); 88 FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash); 89 FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency); 90 FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval); 91 FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval); 92 FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval); 93 FRIEND_TEST(MetricsCollectorTest, SendSample); 94 FRIEND_TEST(MetricsCollectorTest, SendZramMetrics); 95 96 // Type of scale to use for meminfo histograms. For most of them we use 97 // percent of total RAM, but for some we use absolute numbers, usually in 98 // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed 99 // swap (since it can be larger than total RAM). 100 enum MeminfoOp { 101 kMeminfoOp_HistPercent = 0, 102 kMeminfoOp_HistLog, 103 kMeminfoOp_SwapTotal, 104 kMeminfoOp_SwapFree, 105 }; 106 107 // Record for retrieving and reporting values from /proc/meminfo. 108 struct MeminfoRecord { 109 const char* name; // print name 110 const char* match; // string to match in output of /proc/meminfo 111 MeminfoOp op; // histogram scale selector, or other operator 112 int value; // value from /proc/meminfo 113 }; 114 115 // Enables metrics reporting. 116 void OnEnableMetrics(std::unique_ptr<weaved::Command> command); 117 118 // Disables metrics reporting. 119 void OnDisableMetrics(std::unique_ptr<weaved::Command> command); 120 121 // Updates the weave device state. 122 void UpdateWeaveState(); 123 124 // Updates the active use time and logs time between kernel crashes. 125 void ProcessKernelCrash(); 126 127 // Updates the active use time and logs time between unclean shutdowns. 128 void ProcessUncleanShutdown(); 129 130 // Checks if a kernel crash has been detected and returns true if 131 // so. The method assumes that a kernel crash has happened if 132 // |crash_file| exists. It removes the file immediately if it 133 // exists, so it must not be called more than once. 134 bool CheckSystemCrash(const std::string& crash_file); 135 136 // Sends a regular (exponential) histogram sample to Chrome for 137 // transport to UMA. See MetricsLibrary::SendToUMA in 138 // metrics_library.h for a description of the arguments. 139 void SendSample(const std::string& name, int sample, 140 int min, int max, int nbuckets); 141 142 // Sends a linear histogram sample to Chrome for transport to UMA. See 143 // MetricsLibrary::SendToUMA in metrics_library.h for a description of the 144 // arguments. 145 void SendLinearSample(const std::string& name, int sample, 146 int max, int nbuckets); 147 148 // Sends various cumulative kernel crash-related stats, for instance the 149 // total number of kernel crashes since the last version update. 150 void SendKernelCrashesCumulativeCountStats(); 151 152 // Sends a sample representing the number of seconds of active use 153 // for a 24-hour period and reset |use|. 154 void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use); 155 156 // Sends a sample representing a time interval between two crashes of the 157 // same type and reset |interval|. 158 void SendAndResetCrashIntervalSample( 159 const unique_ptr<PersistentInteger>& interval); 160 161 // Sends a sample representing a frequency of crashes of some type and reset 162 // |frequency|. 163 void SendAndResetCrashFrequencySample( 164 const unique_ptr<PersistentInteger>& frequency); 165 166 // Initializes vm and disk stats reporting. 167 void StatsReporterInit(); 168 169 // Schedules meminfo collection callback. 170 void ScheduleMeminfoCallback(int wait); 171 172 // Reports memory statistics. Reschedules callback on success. 173 void MeminfoCallback(base::TimeDelta wait); 174 175 // Parses content of /proc/meminfo and sends fields of interest to UMA. 176 // Returns false on errors. |meminfo_raw| contains the content of 177 // /proc/meminfo. 178 bool ProcessMeminfo(const std::string& meminfo_raw); 179 180 // Parses meminfo data from |meminfo_raw|. |fields| is a vector containing 181 // the fields of interest. The order of the fields must be the same in which 182 // /proc/meminfo prints them. The result of parsing fields[i] is placed in 183 // fields[i].value. 184 bool FillMeminfo(const std::string& meminfo_raw, 185 std::vector<MeminfoRecord>* fields); 186 187 // Schedule a memory use callback in |interval| seconds. 188 void ScheduleMemuseCallback(double interval); 189 190 // Calls MemuseCallbackWork, and possibly schedules next callback, if enough 191 // active time has passed. Otherwise reschedules itself to simulate active 192 // time callbacks (i.e. wall clock time minus sleep time). 193 void MemuseCallback(); 194 195 // Reads /proc/meminfo and sends total anonymous memory usage to UMA. 196 bool MemuseCallbackWork(); 197 198 // Parses meminfo data and sends it to UMA. 199 bool ProcessMemuse(const std::string& meminfo_raw); 200 201 // Reads the current OS version from /etc/lsb-release and hashes it 202 // to a unsigned 32-bit int. 203 uint32_t GetOsVersionHash(); 204 205 // Updates stats, additionally sending them to UMA if enough time has elapsed 206 // since the last report. 207 void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time); 208 209 // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats(). 210 void HandleUpdateStatsTimeout(); 211 212 // Reports zram statistics. 213 bool ReportZram(const base::FilePath& zram_dir); 214 215 // Reads a string from a file and converts it to uint64_t. 216 static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value); 217 218 // Callback invoked when a connection to weaved's service is established 219 // over Binder interface. 220 void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service); 221 222 // VARIABLES 223 224 // Test mode. 225 bool testing_; 226 227 // Publicly readable metrics directory. 228 base::FilePath shared_metrics_directory_; 229 230 // The metrics library handle. 231 MetricsLibraryInterface* metrics_lib_; 232 233 // The last time that UpdateStats() was called. 234 base::TimeTicks last_update_stats_time_; 235 236 // End time of current memuse stat collection interval. 237 double memuse_final_time_; 238 239 // Selects the wait time for the next memory use callback. 240 unsigned int memuse_interval_index_; 241 242 // Used internally by GetIncrementalCpuUse() to return the CPU utilization 243 // between calls. 244 base::TimeDelta latest_cpu_use_microseconds_; 245 246 // Persistent values and accumulators for crash statistics. 247 unique_ptr<PersistentInteger> daily_cycle_; 248 unique_ptr<PersistentInteger> weekly_cycle_; 249 unique_ptr<PersistentInteger> version_cycle_; 250 251 // Active use accumulated in a day. 252 unique_ptr<PersistentInteger> daily_active_use_; 253 // Active use accumulated since the latest version update. 254 unique_ptr<PersistentInteger> version_cumulative_active_use_; 255 256 // The CPU time accumulator. This contains the CPU time, in milliseconds, 257 // used by the system since the most recent OS version update. 258 unique_ptr<PersistentInteger> version_cumulative_cpu_use_; 259 260 unique_ptr<PersistentInteger> user_crash_interval_; 261 unique_ptr<PersistentInteger> kernel_crash_interval_; 262 unique_ptr<PersistentInteger> unclean_shutdown_interval_; 263 264 unique_ptr<PersistentInteger> any_crashes_daily_count_; 265 unique_ptr<PersistentInteger> any_crashes_weekly_count_; 266 unique_ptr<PersistentInteger> user_crashes_daily_count_; 267 unique_ptr<PersistentInteger> user_crashes_weekly_count_; 268 unique_ptr<PersistentInteger> kernel_crashes_daily_count_; 269 unique_ptr<PersistentInteger> kernel_crashes_weekly_count_; 270 unique_ptr<PersistentInteger> kernel_crashes_version_count_; 271 unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_; 272 unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_; 273 274 unique_ptr<CpuUsageCollector> cpu_usage_collector_; 275 unique_ptr<DiskUsageCollector> disk_usage_collector_; 276 unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_; 277 278 unique_ptr<weaved::Service::Subscription> weave_service_subscription_; 279 std::weak_ptr<weaved::Service> service_; 280 281 base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this}; 282 }; 283 284 #endif // METRICS_METRICS_COLLECTOR_H_ 285