1 /**
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
18 #define WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
19
20 #include <android-base/chrono_utils.h>
21 #include <android-base/result.h>
22 #include <android/content/pm/IPackageManagerNative.h>
23 #include <cutils/multiuser.h>
24 #include <gtest/gtest_prod.h>
25 #include <time.h>
26 #include <utils/Errors.h>
27 #include <utils/Looper.h>
28 #include <utils/Mutex.h>
29 #include <utils/String16.h>
30 #include <utils/StrongPointer.h>
31 #include <utils/Vector.h>
32
33 #include <string>
34 #include <thread>
35 #include <unordered_map>
36 #include <unordered_set>
37 #include <vector>
38
39 #include "LooperWrapper.h"
40 #include "ProcPidStat.h"
41 #include "ProcStat.h"
42 #include "UidIoStats.h"
43
44 namespace android {
45 namespace automotive {
46 namespace watchdog {
47
48 constexpr const char* kStartCustomCollectionFlag = "--start_io";
49 constexpr const char* kEndCustomCollectionFlag = "--stop_io";
50 constexpr const char* kIntervalFlag = "--interval";
51 constexpr const char* kMaxDurationFlag = "--max_duration";
52 constexpr const char* kFilterPackagesFlag = "--filter_packages";
53
54 // Performance data collected from the `/proc/uid_io/stats` file.
55 struct UidIoPerfData {
56 struct Stats {
57 userid_t userId = 0;
58 std::string packageName;
59 uint64_t bytes[UID_STATES];
60 uint64_t fsync[UID_STATES];
61 };
62 std::vector<Stats> topNReads = {};
63 std::vector<Stats> topNWrites = {};
64 uint64_t total[METRIC_TYPES][UID_STATES] = {{0}};
65 };
66
67 std::string toString(const UidIoPerfData& perfData);
68
69 // Performance data collected from the `/proc/stats` file.
70 struct SystemIoPerfData {
71 uint64_t cpuIoWaitTime = 0;
72 uint64_t totalCpuTime = 0;
73 uint32_t ioBlockedProcessesCnt = 0;
74 uint32_t totalProcessesCnt = 0;
75 };
76
77 std::string toString(const SystemIoPerfData& perfData);
78
79 // Performance data collected from the `/proc/[pid]/stat` and `/proc/[pid]/task/[tid]/stat` files.
80 struct ProcessIoPerfData {
81 struct UidStats {
82 userid_t userId = 0;
83 std::string packageName;
84 uint64_t count = 0;
85 struct ProcessStats {
86 std::string comm = "";
87 uint64_t count = 0;
88 };
89 std::vector<ProcessStats> topNProcesses = {};
90 };
91 std::vector<UidStats> topNIoBlockedUids = {};
92 // Total # of tasks owned by each UID in |topNIoBlockedUids|.
93 std::vector<uint64_t> topNIoBlockedUidsTotalTaskCnt = {};
94 std::vector<UidStats> topNMajorFaultUids = {};
95 uint64_t totalMajorFaults = 0;
96 // Percentage of increase/decrease in the major page faults since last collection.
97 double majorFaultsPercentChange = 0.0;
98 };
99
100 std::string toString(const ProcessIoPerfData& data);
101
102 struct IoPerfRecord {
103 time_t time; // Collection time.
104 UidIoPerfData uidIoPerfData;
105 SystemIoPerfData systemIoPerfData;
106 ProcessIoPerfData processIoPerfData;
107 };
108
109 std::string toString(const IoPerfRecord& record);
110
111 struct CollectionInfo {
112 std::chrono::nanoseconds interval = 0ns; // Collection interval between subsequent collections.
113 size_t maxCacheSize = 0; // Maximum cache size for the collection.
114 std::unordered_set<std::string> filterPackages; // Filter the output only to the specified
115 // packages.
116 nsecs_t lastCollectionUptime = 0; // Used to calculate the uptime for next collection.
117 std::vector<IoPerfRecord> records; // Cache of collected performance records.
118 };
119
120 std::string toString(const CollectionInfo& collectionInfo);
121
122 enum CollectionEvent {
123 INIT = 0,
124 BOOT_TIME,
125 PERIODIC,
126 CUSTOM,
127 TERMINATED,
128 LAST_EVENT,
129 };
130
131 enum SwitchEvent {
132 // Ends boot-time collection by collecting the last boot-time record and switching the
133 // collection event to periodic collection.
134 END_BOOTTIME_COLLECTION = CollectionEvent::LAST_EVENT + 1,
135 // Ends custom collection, discards collected data and starts periodic collection.
136 END_CUSTOM_COLLECTION
137 };
138
toString(CollectionEvent event)139 static inline std::string toString(CollectionEvent event) {
140 switch (event) {
141 case CollectionEvent::INIT:
142 return "INIT";
143 case CollectionEvent::BOOT_TIME:
144 return "BOOT_TIME";
145 case CollectionEvent::PERIODIC:
146 return "PERIODIC";
147 case CollectionEvent::CUSTOM:
148 return "CUSTOM";
149 case CollectionEvent::TERMINATED:
150 return "TERMINATED";
151 default:
152 return "INVALID";
153 }
154 }
155
156 // IoPerfCollection implements the I/O performance data collection module of the CarWatchDog
157 // service. It exposes APIs that the CarWatchDog main thread and binder service can call to start
158 // a collection, update the collection type, and generate collection dumps.
159 class IoPerfCollection : public MessageHandler {
160 public:
IoPerfCollection()161 IoPerfCollection() :
162 mHandlerLooper(new LooperWrapper()),
163 mBoottimeCollection({}),
164 mPeriodicCollection({}),
165 mCustomCollection({}),
166 mCurrCollectionEvent(CollectionEvent::INIT),
167 mUidToPackageNameMapping({}),
168 mUidIoStats(new UidIoStats()),
169 mProcStat(new ProcStat()),
170 mProcPidStat(new ProcPidStat()),
171 mLastMajorFaults(0) {}
172
~IoPerfCollection()173 ~IoPerfCollection() { terminate(); }
174
175 // Starts the boot-time collection in the looper handler on a collection thread and returns
176 // immediately. Must be called only once. Otherwise, returns an error.
177 android::base::Result<void> start();
178
179 // Terminates the collection thread and returns.
180 void terminate();
181
182 // Ends the boot-time collection, caches boot-time perf records, sends message to the looper to
183 // begin the periodic collection, and returns immediately.
184 virtual android::base::Result<void> onBootFinished();
185
186 // Depending the arguments, it either:
187 // 1. Starts custom collection.
188 // 2. Ends custom collection and dumps the collected data.
189 // Returns any error observed during the dump generation.
190 virtual android::base::Result<void> onCustomCollection(int fd, const Vector<String16>& args);
191
192 // Generates a dump from the boot-time and periodic collection events.
193 virtual android::base::Result<void> onDump(int fd);
194
195 // Dumps the help text.
196 bool dumpHelpText(int fd);
197
198 private:
199 // Dumps the collectors' status when they are disabled.
200 android::base::Result<void> dumpCollectorsStatusLocked(int fd);
201
202 // Starts a custom collection on the looper handler, temporarily stops the periodic collection
203 // (won't discard the collected data), and returns immediately. Returns any error observed
204 // during this process. The custom collection happens once every |interval| seconds. When the
205 // |maxDuration| is reached, the looper receives a message to end the collection, discards the
206 // collected data, and starts the periodic collection. This is needed to ensure the custom
207 // collection doesn't run forever when a subsequent |endCustomCollection| call is not received.
208 // When |kFilterPackagesFlag| value is provided, the results are filtered only to the specified
209 // package names.
210 android::base::Result<void> startCustomCollection(
211 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
212 const std::unordered_set<std::string>& filterPackages);
213
214 // Ends the current custom collection, generates a dump, sends message to looper to start the
215 // periodic collection, and returns immediately. Returns an error when there is no custom
216 // collection running or when a dump couldn't be generated from the custom collection.
217 android::base::Result<void> endCustomCollection(int fd);
218
219 // Handles the messages received by the lopper.
220 void handleMessage(const Message& message) override;
221
222 // Processes the events received by |handleMessage|.
223 android::base::Result<void> processCollectionEvent(CollectionEvent event, CollectionInfo* info);
224
225 // Collects/stores the performance data for the current collection event.
226 android::base::Result<void> collectLocked(CollectionInfo* collectionInfo);
227
228 // Collects performance data from the `/proc/uid_io/stats` file.
229 android::base::Result<void> collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo,
230 UidIoPerfData* uidIoPerfData);
231
232 // Collects performance data from the `/proc/stats` file.
233 android::base::Result<void> collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData);
234
235 // Collects performance data from the `/proc/[pid]/stat` and
236 // `/proc/[pid]/task/[tid]/stat` files.
237 android::base::Result<void> collectProcessIoPerfDataLocked(
238 const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData);
239
240 // Updates the |mUidToPackageNameMapping| for the given |uids|.
241 android::base::Result<void> updateUidToPackageNameMapping(
242 const std::unordered_set<uint32_t>& uids);
243
244 // Retrieves package manager from the default service manager.
245 android::base::Result<void> retrievePackageManager();
246
247 // Top N per-UID stats per category.
248 int mTopNStatsPerCategory;
249
250 // Top N per-process stats per subcategory.
251 int mTopNStatsPerSubcategory;
252
253 // Thread on which the actual collection happens.
254 std::thread mCollectionThread;
255
256 // Makes sure only one collection is running at any given time.
257 Mutex mMutex;
258
259 // Handler lopper to execute different collection events on the collection thread.
260 android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
261
262 // Info for the |CollectionEvent::BOOT_TIME| collection event. The cache is persisted until
263 // system shutdown/reboot.
264 CollectionInfo mBoottimeCollection GUARDED_BY(mMutex);
265
266 // Info for the |CollectionEvent::PERIODIC| collection event. The cache size is limited by
267 // |ro.carwatchdog.periodic_collection_buffer_size|.
268 CollectionInfo mPeriodicCollection GUARDED_BY(mMutex);
269
270 // Info for the |CollectionEvent::CUSTOM| collection event. The info is cleared at the end of
271 // every custom collection.
272 CollectionInfo mCustomCollection GUARDED_BY(mMutex);
273
274 // Tracks the current collection event. Updated on |start|, |onBootComplete|,
275 // |startCustomCollection| and |endCustomCollection|.
276 CollectionEvent mCurrCollectionEvent GUARDED_BY(mMutex);
277
278 // Cache of uid to package name mapping.
279 std::unordered_map<uint64_t, std::string> mUidToPackageNameMapping GUARDED_BY(mMutex);
280
281 // Collector/parser for `/proc/uid_io/stats`.
282 android::sp<UidIoStats> mUidIoStats GUARDED_BY(mMutex);
283
284 // Collector/parser for `/proc/stat`.
285 android::sp<ProcStat> mProcStat GUARDED_BY(mMutex);
286
287 // Collector/parser for `/proc/PID/*` stat files.
288 android::sp<ProcPidStat> mProcPidStat GUARDED_BY(mMutex);
289
290 // Major faults delta from last collection. Useful when calculating the percentage change in
291 // major faults since last collection.
292 uint64_t mLastMajorFaults GUARDED_BY(mMutex);
293
294 // To get the package names from app uids.
295 android::sp<android::content::pm::IPackageManagerNative> mPackageManager GUARDED_BY(mMutex);
296
297 FRIEND_TEST(IoPerfCollectionTest, TestCollectionStartAndTerminate);
298 FRIEND_TEST(IoPerfCollectionTest, TestValidCollectionSequence);
299 FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnZeroEnabledCollectors);
300 FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnError);
301 FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionTerminatesAfterMaxDuration);
302 FRIEND_TEST(IoPerfCollectionTest, TestValidUidIoStatFile);
303 FRIEND_TEST(IoPerfCollectionTest, TestUidIOStatsLessThanTopNStatsLimit);
304 FRIEND_TEST(IoPerfCollectionTest, TestProcUidIoStatsContentsFromDevice);
305 FRIEND_TEST(IoPerfCollectionTest, TestValidProcStatFile);
306 FRIEND_TEST(IoPerfCollectionTest, TestValidProcPidContents);
307 FRIEND_TEST(IoPerfCollectionTest, TestProcPidContentsLessThanTopNStatsLimit);
308 FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionFiltersPackageNames);
309 };
310
311 } // namespace watchdog
312 } // namespace automotive
313 } // namespace android
314
315 #endif // WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
316