• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "carwatchdogd"
18 
19 #include "ProcPidStat.h"
20 
21 #include <android-base/file.h>
22 #include <android-base/parseint.h>
23 #include <android-base/strings.h>
24 #include <dirent.h>
25 #include <log/log.h>
26 
27 #include <string>
28 #include <unordered_map>
29 #include <vector>
30 
31 namespace android {
32 namespace automotive {
33 namespace watchdog {
34 
35 using ::android::base::EndsWith;
36 using ::android::base::Error;
37 using ::android::base::ParseInt;
38 using ::android::base::ParseUint;
39 using ::android::base::ReadFileToString;
40 using ::android::base::Result;
41 using ::android::base::Split;
42 using ::android::base::Trim;
43 
44 namespace {
45 
46 enum ReadError {
47     ERR_INVALID_FILE = 0,
48     ERR_FILE_OPEN_READ = 1,
49     NUM_ERRORS = 2,
50 };
51 
52 // /proc/PID/stat or /proc/PID/task/TID/stat format:
53 // <pid> <comm> <state> <ppid> <pgrp ID> <session ID> <tty_nr> <tpgid> <flags> <minor faults>
54 // <children minor faults> <major faults> <children major faults> <user mode time>
55 // <system mode time> <children user mode time> <children kernel mode time> <priority> <nice value>
56 // <num threads> <start time since boot> <virtual memory size> <resident set size> <rss soft limit>
57 // <start code addr> <end code addr> <start stack addr> <ESP value> <EIP> <bitmap of pending sigs>
58 // <bitmap of blocked sigs> <bitmap of ignored sigs> <waiting channel> <num pages swapped>
59 // <cumulative pages swapped> <exit signal> <processor #> <real-time prio> <agg block I/O delays>
60 // <guest time> <children guest time> <start data addr> <end data addr> <start break addr>
61 // <cmd line args start addr> <amd line args end addr> <env start addr> <env end addr> <exit code>
62 // Example line: 1 (init) S 0 0 0 0 0 0 0 0 220 0 0 0 0 0 0 0 2 0 0 ...etc...
parsePidStatLine(const std::string & line,PidStat * pidStat)63 bool parsePidStatLine(const std::string& line, PidStat* pidStat) {
64     std::vector<std::string> fields = Split(line, " ");
65 
66     // Note: Regex parsing for the below logic increased the time taken to run the
67     // ProcPidStatTest#TestProcPidStatContentsFromDevice from 151.7ms to 1.3 seconds.
68 
69     // Comm string is enclosed with ( ) brackets and may contain space(s). Thus calculate the
70     // commEndOffset based on the field that contains the closing bracket.
71     size_t commEndOffset = 0;
72     for (size_t i = 1; i < fields.size(); ++i) {
73         pidStat->comm += fields[i];
74         if (EndsWith(fields[i], ")")) {
75             commEndOffset = i - 1;
76             break;
77         }
78         pidStat->comm += " ";
79     }
80 
81     if (pidStat->comm.front() != '(' || pidStat->comm.back() != ')') {
82         ALOGW("Comm string `%s` not enclosed in brackets", pidStat->comm.c_str());
83         return false;
84     }
85     pidStat->comm.erase(pidStat->comm.begin());
86     pidStat->comm.erase(pidStat->comm.end() - 1);
87 
88     // The required data is in the first 22 + |commEndOffset| fields so make sure there are at least
89     // these many fields in the file.
90     if (fields.size() < 22 + commEndOffset || !ParseInt(fields[0], &pidStat->pid) ||
91         !ParseInt(fields[3 + commEndOffset], &pidStat->ppid) ||
92         !ParseUint(fields[11 + commEndOffset], &pidStat->majorFaults) ||
93         !ParseUint(fields[19 + commEndOffset], &pidStat->numThreads) ||
94         !ParseUint(fields[21 + commEndOffset], &pidStat->startTime)) {
95         ALOGW("Invalid proc pid stat contents: \"%s\"", line.c_str());
96         return false;
97     }
98     pidStat->state = fields[2 + commEndOffset];
99     return true;
100 }
101 
readPidStatFile(const std::string & path,PidStat * pidStat)102 Result<void> readPidStatFile(const std::string& path, PidStat* pidStat) {
103     std::string buffer;
104     if (!ReadFileToString(path, &buffer)) {
105         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
106     }
107     std::vector<std::string> lines = Split(std::move(buffer), "\n");
108     if (lines.size() != 1 && (lines.size() != 2 || !lines[1].empty())) {
109         return Error(ERR_INVALID_FILE) << path << " contains " << lines.size() << " lines != 1";
110     }
111     if (!parsePidStatLine(std::move(lines[0]), pidStat)) {
112         return Error(ERR_INVALID_FILE) << "Failed to parse the contents of " << path;
113     }
114     return {};
115 }
116 
readKeyValueFile(const std::string & path,const std::string & delimiter)117 Result<std::unordered_map<std::string, std::string>> readKeyValueFile(
118         const std::string& path, const std::string& delimiter) {
119     std::string buffer;
120     if (!ReadFileToString(path, &buffer)) {
121         return Error(ERR_FILE_OPEN_READ) << "ReadFileToString failed for " << path;
122     }
123     std::unordered_map<std::string, std::string> contents;
124     std::vector<std::string> lines = Split(std::move(buffer), "\n");
125     for (size_t i = 0; i < lines.size(); ++i) {
126         if (lines[i].empty()) {
127             continue;
128         }
129         std::vector<std::string> elements = Split(lines[i], delimiter);
130         if (elements.size() < 2) {
131             return Error(ERR_INVALID_FILE)
132                     << "Line \"" << lines[i] << "\" doesn't contain the delimiter \"" << delimiter
133                     << "\" in file " << path;
134         }
135         std::string key = elements[0];
136         std::string value = Trim(lines[i].substr(key.length() + delimiter.length()));
137         if (contents.find(key) != contents.end()) {
138             return Error(ERR_INVALID_FILE)
139                     << "Duplicate " << key << " line: \"" << lines[i] << "\" in file " << path;
140         }
141         contents[key] = value;
142     }
143     return contents;
144 }
145 
146 // /proc/PID/status file format(*):
147 // Tgid:    <Thread group ID of the process>
148 // Uid:     <Read UID>   <Effective UID>   <Saved set UID>   <Filesystem UID>
149 // VmPeak:  <Peak virtual memory size> kB
150 // VmSize:  <Virtual memory size> kB
151 // VmHWM:   <Peak resident set size> kB
152 // VmRSS:   <Resident set size> kB
153 //
154 // (*) - Included only the fields that are parsed from the file.
readPidStatusFile(const std::string & path,ProcessStats * processStats)155 Result<void> readPidStatusFile(const std::string& path, ProcessStats* processStats) {
156     auto ret = readKeyValueFile(path, ":\t");
157     if (!ret.ok()) {
158         return Error(ret.error().code()) << ret.error();
159     }
160     auto contents = ret.value();
161     if (contents.empty()) {
162         return Error(ERR_INVALID_FILE) << "Empty file " << path;
163     }
164     if (contents.find("Uid") == contents.end() ||
165         !ParseInt(Split(contents["Uid"], "\t")[0], &processStats->uid)) {
166         return Error(ERR_INVALID_FILE) << "Failed to read 'UIDs' from file " << path;
167     }
168     if (contents.find("Tgid") == contents.end() ||
169         !ParseInt(contents["Tgid"], &processStats->tgid)) {
170         return Error(ERR_INVALID_FILE) << "Failed to read 'Tgid' from file " << path;
171     }
172     // Below Vm* fields may not be present for some processes so don't fail when they are missing.
173     if (contents.find("VmPeak") != contents.end() &&
174         !ParseUint(Split(contents["VmPeak"], " ")[0], &processStats->vmPeakKb)) {
175         return Error(ERR_INVALID_FILE) << "Failed to parse 'VmPeak' from file " << path;
176     }
177     if (contents.find("VmSize") != contents.end() &&
178         !ParseUint(Split(contents["VmSize"], " ")[0], &processStats->vmSizeKb)) {
179         return Error(ERR_INVALID_FILE) << "Failed to parse 'VmSize' from file " << path;
180     }
181     if (contents.find("VmHWM") != contents.end() &&
182         !ParseUint(Split(contents["VmHWM"], " ")[0], &processStats->vmHwmKb)) {
183         return Error(ERR_INVALID_FILE) << "Failed to parse 'VmHWM' from file " << path;
184     }
185     if (contents.find("VmRSS") != contents.end() &&
186         !ParseUint(Split(contents["VmRSS"], " ")[0], &processStats->vmRssKb)) {
187         return Error(ERR_INVALID_FILE) << "Failed to parse 'VmRSS' from file " << path;
188     }
189     return {};
190 }
191 
192 }  // namespace
193 
collect()194 Result<void> ProcPidStat::collect() {
195     if (!mEnabled) {
196         return Error() << "Can not access PID stat files under " << kProcDirPath;
197     }
198 
199     Mutex::Autolock lock(mMutex);
200     const auto& processStats = getProcessStatsLocked();
201     if (!processStats.ok()) {
202         return Error() << processStats.error();
203     }
204 
205     mDeltaProcessStats.clear();
206     for (const auto& it : *processStats) {
207         const ProcessStats& curStats = it.second;
208         const auto& cachedIt = mLatestProcessStats.find(it.first);
209         if (cachedIt == mLatestProcessStats.end() ||
210             cachedIt->second.process.startTime != curStats.process.startTime) {
211             // New/reused PID so don't calculate the delta.
212             mDeltaProcessStats.emplace_back(curStats);
213             continue;
214         }
215 
216         ProcessStats deltaStats = curStats;
217         const ProcessStats& cachedStats = cachedIt->second;
218         deltaStats.process.majorFaults -= cachedStats.process.majorFaults;
219         for (auto& deltaThread : deltaStats.threads) {
220             const auto& cachedThread = cachedStats.threads.find(deltaThread.first);
221             if (cachedThread == cachedStats.threads.end() ||
222                 cachedThread->second.startTime != deltaThread.second.startTime) {
223                 // New TID or TID reused by the same PID so don't calculate the delta.
224                 continue;
225             }
226             deltaThread.second.majorFaults -= cachedThread->second.majorFaults;
227         }
228         mDeltaProcessStats.emplace_back(deltaStats);
229     }
230     mLatestProcessStats = *processStats;
231     return {};
232 }
233 
getProcessStatsLocked() const234 Result<std::unordered_map<pid_t, ProcessStats>> ProcPidStat::getProcessStatsLocked() const {
235     std::unordered_map<pid_t, ProcessStats> processStats;
236     auto procDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(mPath.c_str()), closedir);
237     if (!procDirp) {
238         return Error() << "Failed to open " << mPath << " directory";
239     }
240     dirent* pidDir = nullptr;
241     while ((pidDir = readdir(procDirp.get())) != nullptr) {
242         // 1. Read top-level pid stats.
243         pid_t pid = 0;
244         if (pidDir->d_type != DT_DIR || !ParseInt(pidDir->d_name, &pid)) {
245             continue;
246         }
247         ProcessStats curStats;
248         std::string path = StringPrintf((mPath + kStatFileFormat).c_str(), pid);
249         auto ret = readPidStatFile(path, &curStats.process);
250         if (!ret.ok()) {
251             // PID may disappear between scanning the directory and parsing the stat file.
252             // Thus treat ERR_FILE_OPEN_READ errors as soft errors.
253             if (ret.error().code() != ERR_FILE_OPEN_READ) {
254                 return Error() << "Failed to read top-level per-process stat file: "
255                                << ret.error().message().c_str();
256             }
257             ALOGW("Failed to read top-level per-process stat file %s: %s", path.c_str(),
258                   ret.error().message().c_str());
259             continue;
260         }
261 
262         // 2. Read aggregated process status.
263         path = StringPrintf((mPath + kStatusFileFormat).c_str(), curStats.process.pid);
264         ret = readPidStatusFile(path, &curStats);
265         if (!ret.ok()) {
266             if (ret.error().code() != ERR_FILE_OPEN_READ) {
267                 return Error() << "Failed to read pid status for pid " << curStats.process.pid
268                                << ": " << ret.error().message().c_str();
269             }
270             ALOGW("Failed to read pid status for pid %" PRIu32 ": %s", curStats.process.pid,
271                   ret.error().message().c_str());
272         }
273 
274         // 3. When failed to read tgid or uid, copy these from the previous collection.
275         if (curStats.tgid == -1 || curStats.uid == -1) {
276             const auto& it = mLatestProcessStats.find(curStats.process.pid);
277             if (it != mLatestProcessStats.end() &&
278                 it->second.process.startTime == curStats.process.startTime) {
279                 curStats.tgid = it->second.tgid;
280                 curStats.uid = it->second.uid;
281             }
282         }
283 
284         if (curStats.tgid != -1 && curStats.tgid != curStats.process.pid) {
285             ALOGW("Skipping non-process (i.e., Tgid != PID) entry for PID %" PRIu32,
286                   curStats.process.pid);
287             continue;
288         }
289 
290         // 3. Fetch per-thread stats.
291         std::string taskDir = StringPrintf((mPath + kTaskDirFormat).c_str(), pid);
292         auto taskDirp = std::unique_ptr<DIR, int (*)(DIR*)>(opendir(taskDir.c_str()), closedir);
293         if (!taskDirp) {
294             // Treat this as a soft error so at least the process stats will be collected.
295             ALOGW("Failed to open %s directory", taskDir.c_str());
296         }
297         dirent* tidDir = nullptr;
298         bool didReadMainThread = false;
299         while (taskDirp != nullptr && (tidDir = readdir(taskDirp.get())) != nullptr) {
300             pid_t tid = 0;
301             if (tidDir->d_type != DT_DIR || !ParseInt(tidDir->d_name, &tid)) {
302                 continue;
303             }
304             if (processStats.find(tid) != processStats.end()) {
305                 return Error() << "Process stats already exists for TID " << tid
306                                << ". Stats will be double counted";
307             }
308 
309             PidStat curThreadStat = {};
310             path = StringPrintf((taskDir + kStatFileFormat).c_str(), tid);
311             const auto& ret = readPidStatFile(path, &curThreadStat);
312             if (!ret.ok()) {
313                 if (ret.error().code() != ERR_FILE_OPEN_READ) {
314                     return Error() << "Failed to read per-thread stat file: "
315                                    << ret.error().message().c_str();
316                 }
317                 // Maybe the thread terminated before reading the file so skip this thread and
318                 // continue with scanning the next thread's stat.
319                 ALOGW("Failed to read per-thread stat file %s: %s", path.c_str(),
320                       ret.error().message().c_str());
321                 continue;
322             }
323             if (curThreadStat.pid == curStats.process.pid) {
324                 didReadMainThread = true;
325             }
326             curStats.threads[curThreadStat.pid] = curThreadStat;
327         }
328         if (!didReadMainThread) {
329             // In the event of failure to read main-thread info (mostly because the process
330             // terminated during scanning/parsing), fill out the stat that are common between main
331             // thread and the process.
332             curStats.threads[curStats.process.pid] = PidStat{
333                     .pid = curStats.process.pid,
334                     .comm = curStats.process.comm,
335                     .state = curStats.process.state,
336                     .ppid = curStats.process.ppid,
337                     .numThreads = curStats.process.numThreads,
338                     .startTime = curStats.process.startTime,
339             };
340         }
341         processStats[curStats.process.pid] = curStats;
342     }
343     return processStats;
344 }
345 
346 }  // namespace watchdog
347 }  // namespace automotive
348 }  // namespace android
349