• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef METRICS_METRICS_COLLECTOR_H_
18 #define METRICS_METRICS_COLLECTOR_H_
19 
20 #include <stdint.h>
21 
22 #include <map>
23 #include <memory>
24 #include <string>
25 #include <vector>
26 
27 #include <base/files/file_path.h>
28 #include <base/memory/weak_ptr.h>
29 #include <base/time/time.h>
30 #include <brillo/binder_watcher.h>
31 #include <brillo/daemons/daemon.h>
32 #include <libweaved/command.h>
33 #include <libweaved/service.h>
34 #include <gtest/gtest_prod.h>  // for FRIEND_TEST
35 
36 #include "collectors/averaged_statistics_collector.h"
37 #include "collectors/cpu_usage_collector.h"
38 #include "collectors/disk_usage_collector.h"
39 #include "metrics/metrics_library.h"
40 #include "persistent_integer.h"
41 
42 using chromeos_metrics::PersistentInteger;
43 using std::unique_ptr;
44 
45 class MetricsCollector : public brillo::Daemon {
46  public:
47   MetricsCollector();
48   ~MetricsCollector();
49 
50   // Initializes metrics class variables.
51   void Init(bool testing,
52             MetricsLibraryInterface* metrics_lib,
53             const std::string& diskstats_path,
54             const base::FilePath& private_metrics_directory,
55             const base::FilePath& shared_metrics_directory);
56 
57   // Initializes the daemon.
58   int OnInit() override;
59 
60   // Does all the work.
61   int Run() override;
62 
63   // Returns the active time since boot (uptime minus sleep time) in seconds.
64   static double GetActiveTime();
65 
66   // Updates the active use time and logs time between user-space
67   // process crashes.  Called via MetricsCollectorServiceTrampoline.
68   void ProcessUserCrash();
69 
70  protected:
71   // Used also by the unit tests.
72   static const char kComprDataSizeName[];
73   static const char kOrigDataSizeName[];
74   static const char kZeroPagesName[];
75 
76  private:
77   friend class MetricsCollectorTest;
78   FRIEND_TEST(MetricsCollectorTest, CheckSystemCrash);
79   FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoCurrent);
80   FRIEND_TEST(MetricsCollectorTest, ComputeEpochNoLast);
81   FRIEND_TEST(MetricsCollectorTest, GetHistogramPath);
82   FRIEND_TEST(MetricsCollectorTest, IsNewEpoch);
83   FRIEND_TEST(MetricsCollectorTest, MessageFilter);
84   FRIEND_TEST(MetricsCollectorTest, ProcessKernelCrash);
85   FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo);
86   FRIEND_TEST(MetricsCollectorTest, ProcessMeminfo2);
87   FRIEND_TEST(MetricsCollectorTest, ProcessUncleanShutdown);
88   FRIEND_TEST(MetricsCollectorTest, ProcessUserCrash);
89   FRIEND_TEST(MetricsCollectorTest, ReportCrashesDailyFrequency);
90   FRIEND_TEST(MetricsCollectorTest, ReportKernelCrashInterval);
91   FRIEND_TEST(MetricsCollectorTest, ReportUncleanShutdownInterval);
92   FRIEND_TEST(MetricsCollectorTest, ReportUserCrashInterval);
93   FRIEND_TEST(MetricsCollectorTest, SendSample);
94   FRIEND_TEST(MetricsCollectorTest, SendZramMetrics);
95 
96   // Type of scale to use for meminfo histograms.  For most of them we use
97   // percent of total RAM, but for some we use absolute numbers, usually in
98   // megabytes, on a log scale from 0 to 4000, and 0 to 8000 for compressed
99   // swap (since it can be larger than total RAM).
100   enum MeminfoOp {
101     kMeminfoOp_HistPercent = 0,
102     kMeminfoOp_HistLog,
103     kMeminfoOp_SwapTotal,
104     kMeminfoOp_SwapFree,
105   };
106 
107   // Record for retrieving and reporting values from /proc/meminfo.
108   struct MeminfoRecord {
109     const char* name;        // print name
110     const char* match;       // string to match in output of /proc/meminfo
111     MeminfoOp op;            // histogram scale selector, or other operator
112     int value;               // value from /proc/meminfo
113   };
114 
115   // Enables metrics reporting.
116   void OnEnableMetrics(std::unique_ptr<weaved::Command> command);
117 
118   // Disables metrics reporting.
119   void OnDisableMetrics(std::unique_ptr<weaved::Command> command);
120 
121   // Updates the weave device state.
122   void UpdateWeaveState();
123 
124   // Updates the active use time and logs time between kernel crashes.
125   void ProcessKernelCrash();
126 
127   // Updates the active use time and logs time between unclean shutdowns.
128   void ProcessUncleanShutdown();
129 
130   // Checks if a kernel crash has been detected and returns true if
131   // so.  The method assumes that a kernel crash has happened if
132   // |crash_file| exists.  It removes the file immediately if it
133   // exists, so it must not be called more than once.
134   bool CheckSystemCrash(const std::string& crash_file);
135 
136   // Sends a regular (exponential) histogram sample to Chrome for
137   // transport to UMA. See MetricsLibrary::SendToUMA in
138   // metrics_library.h for a description of the arguments.
139   void SendSample(const std::string& name, int sample,
140                   int min, int max, int nbuckets);
141 
142   // Sends a linear histogram sample to Chrome for transport to UMA. See
143   // MetricsLibrary::SendToUMA in metrics_library.h for a description of the
144   // arguments.
145   void SendLinearSample(const std::string& name, int sample,
146                         int max, int nbuckets);
147 
148   // Sends various cumulative kernel crash-related stats, for instance the
149   // total number of kernel crashes since the last version update.
150   void SendKernelCrashesCumulativeCountStats();
151 
152   // Sends a sample representing the number of seconds of active use
153   // for a 24-hour period and reset |use|.
154   void SendAndResetDailyUseSample(const unique_ptr<PersistentInteger>& use);
155 
156   // Sends a sample representing a time interval between two crashes of the
157   // same type and reset |interval|.
158   void SendAndResetCrashIntervalSample(
159       const unique_ptr<PersistentInteger>& interval);
160 
161   // Sends a sample representing a frequency of crashes of some type and reset
162   // |frequency|.
163   void SendAndResetCrashFrequencySample(
164       const unique_ptr<PersistentInteger>& frequency);
165 
166   // Initializes vm and disk stats reporting.
167   void StatsReporterInit();
168 
169   // Schedules meminfo collection callback.
170   void ScheduleMeminfoCallback(int wait);
171 
172   // Reports memory statistics.  Reschedules callback on success.
173   void MeminfoCallback(base::TimeDelta wait);
174 
175   // Parses content of /proc/meminfo and sends fields of interest to UMA.
176   // Returns false on errors.  |meminfo_raw| contains the content of
177   // /proc/meminfo.
178   bool ProcessMeminfo(const std::string& meminfo_raw);
179 
180   // Parses meminfo data from |meminfo_raw|.  |fields| is a vector containing
181   // the fields of interest.  The order of the fields must be the same in which
182   // /proc/meminfo prints them.  The result of parsing fields[i] is placed in
183   // fields[i].value.
184   bool FillMeminfo(const std::string& meminfo_raw,
185                    std::vector<MeminfoRecord>* fields);
186 
187   // Schedule a memory use callback in |interval| seconds.
188   void ScheduleMemuseCallback(double interval);
189 
190   // Calls MemuseCallbackWork, and possibly schedules next callback, if enough
191   // active time has passed.  Otherwise reschedules itself to simulate active
192   // time callbacks (i.e. wall clock time minus sleep time).
193   void MemuseCallback();
194 
195   // Reads /proc/meminfo and sends total anonymous memory usage to UMA.
196   bool MemuseCallbackWork();
197 
198   // Parses meminfo data and sends it to UMA.
199   bool ProcessMemuse(const std::string& meminfo_raw);
200 
201   // Reads the current OS version from /etc/lsb-release and hashes it
202   // to a unsigned 32-bit int.
203   uint32_t GetOsVersionHash();
204 
205   // Updates stats, additionally sending them to UMA if enough time has elapsed
206   // since the last report.
207   void UpdateStats(base::TimeTicks now_ticks, base::Time now_wall_time);
208 
209   // Invoked periodically by |update_stats_timeout_id_| to call UpdateStats().
210   void HandleUpdateStatsTimeout();
211 
212   // Reports zram statistics.
213   bool ReportZram(const base::FilePath& zram_dir);
214 
215   // Reads a string from a file and converts it to uint64_t.
216   static bool ReadFileToUint64(const base::FilePath& path, uint64_t* value);
217 
218   // Callback invoked when a connection to weaved's service is established
219   // over Binder interface.
220   void OnWeaveServiceConnected(const std::weak_ptr<weaved::Service>& service);
221 
222   // VARIABLES
223 
224   // Test mode.
225   bool testing_;
226 
227   // Publicly readable metrics directory.
228   base::FilePath shared_metrics_directory_;
229 
230   // The metrics library handle.
231   MetricsLibraryInterface* metrics_lib_;
232 
233   // The last time that UpdateStats() was called.
234   base::TimeTicks last_update_stats_time_;
235 
236   // End time of current memuse stat collection interval.
237   double memuse_final_time_;
238 
239   // Selects the wait time for the next memory use callback.
240   unsigned int memuse_interval_index_;
241 
242   // Used internally by GetIncrementalCpuUse() to return the CPU utilization
243   // between calls.
244   base::TimeDelta latest_cpu_use_microseconds_;
245 
246   // Persistent values and accumulators for crash statistics.
247   unique_ptr<PersistentInteger> daily_cycle_;
248   unique_ptr<PersistentInteger> weekly_cycle_;
249   unique_ptr<PersistentInteger> version_cycle_;
250 
251   // Active use accumulated in a day.
252   unique_ptr<PersistentInteger> daily_active_use_;
253   // Active use accumulated since the latest version update.
254   unique_ptr<PersistentInteger> version_cumulative_active_use_;
255 
256   // The CPU time accumulator.  This contains the CPU time, in milliseconds,
257   // used by the system since the most recent OS version update.
258   unique_ptr<PersistentInteger> version_cumulative_cpu_use_;
259 
260   unique_ptr<PersistentInteger> user_crash_interval_;
261   unique_ptr<PersistentInteger> kernel_crash_interval_;
262   unique_ptr<PersistentInteger> unclean_shutdown_interval_;
263 
264   unique_ptr<PersistentInteger> any_crashes_daily_count_;
265   unique_ptr<PersistentInteger> any_crashes_weekly_count_;
266   unique_ptr<PersistentInteger> user_crashes_daily_count_;
267   unique_ptr<PersistentInteger> user_crashes_weekly_count_;
268   unique_ptr<PersistentInteger> kernel_crashes_daily_count_;
269   unique_ptr<PersistentInteger> kernel_crashes_weekly_count_;
270   unique_ptr<PersistentInteger> kernel_crashes_version_count_;
271   unique_ptr<PersistentInteger> unclean_shutdowns_daily_count_;
272   unique_ptr<PersistentInteger> unclean_shutdowns_weekly_count_;
273 
274   unique_ptr<CpuUsageCollector> cpu_usage_collector_;
275   unique_ptr<DiskUsageCollector> disk_usage_collector_;
276   unique_ptr<AveragedStatisticsCollector> averaged_stats_collector_;
277 
278   unique_ptr<weaved::Service::Subscription> weave_service_subscription_;
279   std::weak_ptr<weaved::Service> service_;
280 
281   base::WeakPtrFactory<MetricsCollector> weak_ptr_factory_{this};
282 };
283 
284 #endif  // METRICS_METRICS_COLLECTOR_H_
285