• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
18 #define WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
19 
20 #include <android-base/chrono_utils.h>
21 #include <android-base/result.h>
22 #include <android/content/pm/IPackageManagerNative.h>
23 #include <cutils/multiuser.h>
24 #include <gtest/gtest_prod.h>
25 #include <time.h>
26 #include <utils/Errors.h>
27 #include <utils/Looper.h>
28 #include <utils/Mutex.h>
29 #include <utils/String16.h>
30 #include <utils/StrongPointer.h>
31 #include <utils/Vector.h>
32 
33 #include <string>
34 #include <thread>
35 #include <unordered_map>
36 #include <unordered_set>
37 #include <vector>
38 
39 #include "LooperWrapper.h"
40 #include "ProcPidStat.h"
41 #include "ProcStat.h"
42 #include "UidIoStats.h"
43 
44 namespace android {
45 namespace automotive {
46 namespace watchdog {
47 
48 constexpr const char* kStartCustomCollectionFlag = "--start_io";
49 constexpr const char* kEndCustomCollectionFlag = "--stop_io";
50 constexpr const char* kIntervalFlag = "--interval";
51 constexpr const char* kMaxDurationFlag = "--max_duration";
52 constexpr const char* kFilterPackagesFlag = "--filter_packages";
53 
54 // Performance data collected from the `/proc/uid_io/stats` file.
55 struct UidIoPerfData {
56     struct Stats {
57         userid_t userId = 0;
58         std::string packageName;
59         uint64_t bytes[UID_STATES];
60         uint64_t fsync[UID_STATES];
61     };
62     std::vector<Stats> topNReads = {};
63     std::vector<Stats> topNWrites = {};
64     uint64_t total[METRIC_TYPES][UID_STATES] = {{0}};
65 };
66 
67 std::string toString(const UidIoPerfData& perfData);
68 
69 // Performance data collected from the `/proc/stats` file.
70 struct SystemIoPerfData {
71     uint64_t cpuIoWaitTime = 0;
72     uint64_t totalCpuTime = 0;
73     uint32_t ioBlockedProcessesCnt = 0;
74     uint32_t totalProcessesCnt = 0;
75 };
76 
77 std::string toString(const SystemIoPerfData& perfData);
78 
79 // Performance data collected from the `/proc/[pid]/stat` and `/proc/[pid]/task/[tid]/stat` files.
80 struct ProcessIoPerfData {
81     struct UidStats {
82         userid_t userId = 0;
83         std::string packageName;
84         uint64_t count = 0;
85         struct ProcessStats {
86             std::string comm = "";
87             uint64_t count = 0;
88         };
89         std::vector<ProcessStats> topNProcesses = {};
90     };
91     std::vector<UidStats> topNIoBlockedUids = {};
92     // Total # of tasks owned by each UID in |topNIoBlockedUids|.
93     std::vector<uint64_t> topNIoBlockedUidsTotalTaskCnt = {};
94     std::vector<UidStats> topNMajorFaultUids = {};
95     uint64_t totalMajorFaults = 0;
96     // Percentage of increase/decrease in the major page faults since last collection.
97     double majorFaultsPercentChange = 0.0;
98 };
99 
100 std::string toString(const ProcessIoPerfData& data);
101 
102 struct IoPerfRecord {
103     time_t time;  // Collection time.
104     UidIoPerfData uidIoPerfData;
105     SystemIoPerfData systemIoPerfData;
106     ProcessIoPerfData processIoPerfData;
107 };
108 
109 std::string toString(const IoPerfRecord& record);
110 
111 struct CollectionInfo {
112     std::chrono::nanoseconds interval = 0ns;  // Collection interval between subsequent collections.
113     size_t maxCacheSize = 0;                  // Maximum cache size for the collection.
114     std::unordered_set<std::string> filterPackages;  // Filter the output only to the specified
115                                                      // packages.
116     nsecs_t lastCollectionUptime = 0;         // Used to calculate the uptime for next collection.
117     std::vector<IoPerfRecord> records;        // Cache of collected performance records.
118 };
119 
120 std::string toString(const CollectionInfo& collectionInfo);
121 
122 enum CollectionEvent {
123     INIT = 0,
124     BOOT_TIME,
125     PERIODIC,
126     CUSTOM,
127     TERMINATED,
128     LAST_EVENT,
129 };
130 
131 enum SwitchEvent {
132     // Ends boot-time collection by collecting the last boot-time record and switching the
133     // collection event to periodic collection.
134     END_BOOTTIME_COLLECTION = CollectionEvent::LAST_EVENT + 1,
135     // Ends custom collection, discards collected data and starts periodic collection.
136     END_CUSTOM_COLLECTION
137 };
138 
toString(CollectionEvent event)139 static inline std::string toString(CollectionEvent event) {
140     switch (event) {
141         case CollectionEvent::INIT:
142             return "INIT";
143         case CollectionEvent::BOOT_TIME:
144             return "BOOT_TIME";
145         case CollectionEvent::PERIODIC:
146             return "PERIODIC";
147         case CollectionEvent::CUSTOM:
148             return "CUSTOM";
149         case CollectionEvent::TERMINATED:
150             return "TERMINATED";
151         default:
152             return "INVALID";
153     }
154 }
155 
156 // IoPerfCollection implements the I/O performance data collection module of the CarWatchDog
157 // service. It exposes APIs that the CarWatchDog main thread and binder service can call to start
158 // a collection, update the collection type, and generate collection dumps.
159 class IoPerfCollection : public MessageHandler {
160 public:
IoPerfCollection()161     IoPerfCollection() :
162           mHandlerLooper(new LooperWrapper()),
163           mBoottimeCollection({}),
164           mPeriodicCollection({}),
165           mCustomCollection({}),
166           mCurrCollectionEvent(CollectionEvent::INIT),
167           mUidToPackageNameMapping({}),
168           mUidIoStats(new UidIoStats()),
169           mProcStat(new ProcStat()),
170           mProcPidStat(new ProcPidStat()),
171           mLastMajorFaults(0) {}
172 
~IoPerfCollection()173     ~IoPerfCollection() { terminate(); }
174 
175     // Starts the boot-time collection in the looper handler on a collection thread and returns
176     // immediately. Must be called only once. Otherwise, returns an error.
177     android::base::Result<void> start();
178 
179     // Terminates the collection thread and returns.
180     void terminate();
181 
182     // Ends the boot-time collection, caches boot-time perf records, sends message to the looper to
183     // begin the periodic collection, and returns immediately.
184     virtual android::base::Result<void> onBootFinished();
185 
186     // Depending the arguments, it either:
187     // 1. Starts custom collection.
188     // 2. Ends custom collection and dumps the collected data.
189     // Returns any error observed during the dump generation.
190     virtual android::base::Result<void> onCustomCollection(int fd, const Vector<String16>& args);
191 
192     // Generates a dump from the boot-time and periodic collection events.
193     virtual android::base::Result<void> onDump(int fd);
194 
195     // Dumps the help text.
196     bool dumpHelpText(int fd);
197 
198 private:
199     // Dumps the collectors' status when they are disabled.
200     android::base::Result<void> dumpCollectorsStatusLocked(int fd);
201 
202     // Starts a custom collection on the looper handler, temporarily stops the periodic collection
203     // (won't discard the collected data), and returns immediately. Returns any error observed
204     // during this process. The custom collection happens once every |interval| seconds. When the
205     // |maxDuration| is reached, the looper receives a message to end the collection, discards the
206     // collected data, and starts the periodic collection. This is needed to ensure the custom
207     // collection doesn't run forever when a subsequent |endCustomCollection| call is not received.
208     // When |kFilterPackagesFlag| value is provided, the results are filtered only to the specified
209     // package names.
210     android::base::Result<void> startCustomCollection(
211             std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
212             const std::unordered_set<std::string>& filterPackages);
213 
214     // Ends the current custom collection, generates a dump, sends message to looper to start the
215     // periodic collection, and returns immediately. Returns an error when there is no custom
216     // collection running or when a dump couldn't be generated from the custom collection.
217     android::base::Result<void> endCustomCollection(int fd);
218 
219     // Handles the messages received by the lopper.
220     void handleMessage(const Message& message) override;
221 
222     // Processes the events received by |handleMessage|.
223     android::base::Result<void> processCollectionEvent(CollectionEvent event, CollectionInfo* info);
224 
225     // Collects/stores the performance data for the current collection event.
226     android::base::Result<void> collectLocked(CollectionInfo* collectionInfo);
227 
228     // Collects performance data from the `/proc/uid_io/stats` file.
229     android::base::Result<void> collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo,
230                                                            UidIoPerfData* uidIoPerfData);
231 
232     // Collects performance data from the `/proc/stats` file.
233     android::base::Result<void> collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData);
234 
235     // Collects performance data from the `/proc/[pid]/stat` and
236     // `/proc/[pid]/task/[tid]/stat` files.
237     android::base::Result<void> collectProcessIoPerfDataLocked(
238             const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData);
239 
240     // Updates the |mUidToPackageNameMapping| for the given |uids|.
241     android::base::Result<void> updateUidToPackageNameMapping(
242             const std::unordered_set<uint32_t>& uids);
243 
244     // Retrieves package manager from the default service manager.
245     android::base::Result<void> retrievePackageManager();
246 
247     // Top N per-UID stats per category.
248     int mTopNStatsPerCategory;
249 
250     // Top N per-process stats per subcategory.
251     int mTopNStatsPerSubcategory;
252 
253     // Thread on which the actual collection happens.
254     std::thread mCollectionThread;
255 
256     // Makes sure only one collection is running at any given time.
257     Mutex mMutex;
258 
259     // Handler lopper to execute different collection events on the collection thread.
260     android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
261 
262     // Info for the |CollectionEvent::BOOT_TIME| collection event. The cache is persisted until
263     // system shutdown/reboot.
264     CollectionInfo mBoottimeCollection GUARDED_BY(mMutex);
265 
266     // Info for the |CollectionEvent::PERIODIC| collection event. The cache size is limited by
267     // |ro.carwatchdog.periodic_collection_buffer_size|.
268     CollectionInfo mPeriodicCollection GUARDED_BY(mMutex);
269 
270     // Info for the |CollectionEvent::CUSTOM| collection event. The info is cleared at the end of
271     // every custom collection.
272     CollectionInfo mCustomCollection GUARDED_BY(mMutex);
273 
274     // Tracks the current collection event. Updated on |start|, |onBootComplete|,
275     // |startCustomCollection| and |endCustomCollection|.
276     CollectionEvent mCurrCollectionEvent GUARDED_BY(mMutex);
277 
278     // Cache of uid to package name mapping.
279     std::unordered_map<uint64_t, std::string> mUidToPackageNameMapping GUARDED_BY(mMutex);
280 
281     // Collector/parser for `/proc/uid_io/stats`.
282     android::sp<UidIoStats> mUidIoStats GUARDED_BY(mMutex);
283 
284     // Collector/parser for `/proc/stat`.
285     android::sp<ProcStat> mProcStat GUARDED_BY(mMutex);
286 
287     // Collector/parser for `/proc/PID/*` stat files.
288     android::sp<ProcPidStat> mProcPidStat GUARDED_BY(mMutex);
289 
290     // Major faults delta from last collection. Useful when calculating the percentage change in
291     // major faults since last collection.
292     uint64_t mLastMajorFaults GUARDED_BY(mMutex);
293 
294     // To get the package names from app uids.
295     android::sp<android::content::pm::IPackageManagerNative> mPackageManager GUARDED_BY(mMutex);
296 
297     FRIEND_TEST(IoPerfCollectionTest, TestCollectionStartAndTerminate);
298     FRIEND_TEST(IoPerfCollectionTest, TestValidCollectionSequence);
299     FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnZeroEnabledCollectors);
300     FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnError);
301     FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionTerminatesAfterMaxDuration);
302     FRIEND_TEST(IoPerfCollectionTest, TestValidUidIoStatFile);
303     FRIEND_TEST(IoPerfCollectionTest, TestUidIOStatsLessThanTopNStatsLimit);
304     FRIEND_TEST(IoPerfCollectionTest, TestProcUidIoStatsContentsFromDevice);
305     FRIEND_TEST(IoPerfCollectionTest, TestValidProcStatFile);
306     FRIEND_TEST(IoPerfCollectionTest, TestValidProcPidContents);
307     FRIEND_TEST(IoPerfCollectionTest, TestProcPidContentsLessThanTopNStatsLimit);
308     FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionFiltersPackageNames);
309 };
310 
311 }  // namespace watchdog
312 }  // namespace automotive
313 }  // namespace android
314 
315 #endif  //  WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
316