1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "platform_monitor.h"
16
17 #include <algorithm>
18 #include <cinttypes>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <vector>
23
24 #include "hisysevent.h"
25 #include "hiview_global.h"
26 #include "hiview_logger.h"
27 #include "pipeline.h"
28 #include "sys_event_dao.h"
29 #include "sys_event.h"
30 #include "time_util.h"
31 #include "monitor_config.h"
32
33 namespace OHOS {
34 namespace HiviewDFX {
35 DEFINE_LOG_TAG("HiView-Monitor");
36 namespace {
37 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
38 };
39
AccumulateTimeInterval(uint64_t costTime,std::map<int8_t,uint32_t> & stat)40 void PlatformMonitor::AccumulateTimeInterval(uint64_t costTime, std::map<int8_t, uint32_t> &stat)
41 {
42 std::lock_guard<std::mutex> lock(statMutex_);
43 auto lastPos = std::end(intervals_);
44 auto it = std::lower_bound(intervals_, lastPos, costTime);
45 if (it == lastPos) {
46 HIVIEW_LOGD("lower bound base on %{public}" PRIu64 " not found", costTime);
47 return;
48 }
49 int index = it - intervals_;
50 stat[index] += 1;
51 }
52
CollectEvent(std::shared_ptr<PipelineEvent> event)53 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
54 {
55 if (event == nullptr) {
56 return;
57 }
58 std::lock_guard<std::mutex> lock(topMutex_);
59 topDomains_[event->domain_]++;
60 topEvents_[event->eventName_]++;
61 }
62
CollectCostTime(PipelineEvent * event)63 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
64 {
65 // collect data after event destory
66 if (event == nullptr) {
67 return;
68 }
69 onceTotalCnt_++;
70 onceTotalRealTime_ += event->realtime_;
71 onceTotalProcTime_ += event->processTime_;
72 uint64_t waitTime = event->processTime_ > event->realtime_ ? (event->processTime_ - event->realtime_) : 0;
73 onceTotalWaitTime_ += waitTime;
74 AccumulateTimeInterval(event->realtime_, realStat_);
75 AccumulateTimeInterval(event->processTime_, processStat_);
76 AccumulateTimeInterval(waitTime, waitTimeStat_);
77 if (event->realtime_ > realTimeBenchMark_) {
78 overRealTotalCount_++;
79 }
80 if (event->processTime_ > processTimeBenchMark_) {
81 overProcessTotalCount_++;
82 }
83 finishedCount_++;
84 HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
85 "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
86 "finishedCount_=%{public}u",
87 onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
88 onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
89 finishedCount_);
90 }
91
CollectPerfProfiler()92 void PlatformMonitor::CollectPerfProfiler()
93 {
94 HIVIEW_LOGI("collect performance profiler");
95 // collect data every 5 minute
96 // collect event max size and max count
97 if (maxTotalCount_ < SysEvent::totalCount_) {
98 maxTotalCount_.store(SysEvent::totalCount_);
99 }
100 if (maxTotalSize_ < SysEvent::totalSize_) {
101 maxTotalSize_.store(SysEvent::totalSize_);
102 }
103 // total count, total size
104 totalCount_ = SysEvent::totalCount_;
105 totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
106 // min speed, max speed
107 uint32_t onceTotalRealTime = onceTotalRealTime_;
108 uint32_t onceTotalProcTime = onceTotalProcTime_;
109 uint32_t onceTotalWaitTime = onceTotalWaitTime_;
110 uint32_t onceTotalCnt = onceTotalCnt_;
111 onceTotalRealTime_ = 0;
112 onceTotalProcTime_ = 0;
113 onceTotalWaitTime_ = 0;
114 onceTotalCnt_ = 0;
115 if (onceTotalRealTime > 0) {
116 curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
117 if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
118 minSpeed_ = curRealSpeed_;
119 }
120 if (curRealSpeed_ > maxSpeed_) {
121 maxSpeed_ = curRealSpeed_;
122 }
123 } else {
124 minSpeed_ = 0;
125 maxSpeed_ = 0;
126 curRealSpeed_ = 0;
127 }
128 if (onceTotalProcTime > 0) {
129 curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
130 } else {
131 curProcSpeed_ = 0;
132 }
133 if (onceTotalCnt > 0) {
134 avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
135 avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
136 avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
137 }
138 HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
139 "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
140 "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
141 "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
142 "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
143 maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
144 onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
145 onceTotalCnt, minSpeed_, maxSpeed_,
146 curRealSpeed_, curProcSpeed_,
147 avgRealTime_, avgProcessTime_, avgWaitTime_);
148 }
149
GetDomainsStat(PerfMeasure & perfMeasure)150 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
151 {
152 std::lock_guard<std::mutex> lock(topMutex_);
153 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
154 perfMeasure.domains.emplace_back(it->first);
155 perfMeasure.domainCounts.emplace_back(it->second);
156 }
157 topDomains_.clear();
158 topEvents_.clear();
159 }
160
GetCostTimeInterval(PerfMeasure & perfMeasure)161 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
162 {
163 std::lock_guard<std::mutex> lock(statMutex_);
164 for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
165 uint32_t realCount = realStat_[index];
166 perfMeasure.realCounts.emplace_back(realCount);
167 uint32_t processCount = processStat_[index];
168 perfMeasure.processCounts.emplace_back(processCount);
169 uint32_t waitCount = waitTimeStat_[index];
170 perfMeasure.waitCounts.emplace_back(waitCount);
171 }
172 realStat_.clear();
173 processStat_.clear();
174 waitTimeStat_.clear();
175 }
176
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)177 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
178 {
179 perfMeasure.finishedCount = finishedCount_;
180 perfMeasure.overRealTotalCount = overRealTotalCount_;
181 perfMeasure.overProcessTotalCount = overProcessTotalCount_;
182 finishedCount_ = 0;
183 overRealTotalCount_ = 0;
184 overProcessTotalCount_ = 0;
185
186 if (perfMeasure.finishedCount > 0) {
187 perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
188 } else if (perfMeasure.overRealTotalCount > 0) {
189 perfMeasure.realPercent = PCT;
190 }
191
192 if (perfMeasure.finishedCount > 0) {
193 perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
194 } else if (perfMeasure.overProcessTotalCount > 0) {
195 perfMeasure.processpercent = PCT;
196 }
197 }
198
GetMaxTotalMeasure(PerfMeasure & perfMeasure)199 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
200 {
201 perfMeasure.maxTotalCount = maxTotalCount_.load();
202 maxTotalCount_.store(0);
203
204 perfMeasure.maxTotalSize = maxTotalSize_.load();
205 maxTotalSize_.store(0);
206 }
207
GetBreakStat(PerfMeasure & perfMeasure)208 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
209 {
210 perfMeasure.totalCount = totalCount_;
211 totalCount_ = 0;
212
213 perfMeasure.totalSize = totalSize_;
214 totalSize_ = 0;
215
216 perfMeasure.breakCount = breakCount_;
217 breakCount_ = 0;
218
219 perfMeasure.breakDuration = breakDuration_;
220 breakDuration_ = 0;
221 }
222
GetMaxSpeed(PerfMeasure & perfMeasure) const223 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
224 {
225 perfMeasure.minSpeed = minSpeed_;
226 perfMeasure.maxSpeed = maxSpeed_;
227 }
228
ReportProfile(const PerfMeasure & perfMeasure)229 void PlatformMonitor::ReportProfile(const PerfMeasure& perfMeasure)
230 {
231 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "PROFILE_STAT", HiSysEvent::EventType::STATISTIC,
232 "MAX_TOTAL_COUNT", perfMeasure.maxTotalCount, "MAX_TOTAL_SIZE", perfMeasure.maxTotalSize,
233 "DOMAINS", perfMeasure.domains, "DOMAIN_DETAIL", perfMeasure.domainCounts,
234 "TOTAL_COUNT", perfMeasure.totalCount, "TOTAL_SIZE", perfMeasure.totalSize,
235 "BREAK_COUNT", perfMeasure.breakCount, "BREAK_DURATION", perfMeasure.breakDuration,
236 "MIN_SPEED", perfMeasure.minSpeed, "MAX_SPEED", perfMeasure.maxSpeed, "REAL_COUNT", perfMeasure.realCounts,
237 "PROCESS_COUNT", perfMeasure.processCounts, "WAIT_COUNT", perfMeasure.waitCounts,
238 "FINISHED_COUNT", perfMeasure.finishedCount, "OVER_REAL_COUNT", perfMeasure.overRealTotalCount,
239 "OVER_REAL_PCT", perfMeasure.realPercent, "OVER_PROC_COUNT", perfMeasure.overProcessTotalCount,
240 "OVER_PROC_PCT", perfMeasure.processpercent);
241 if (ret != SUCCESS) {
242 HIVIEW_LOGE("failed to write PROFILE_STAT event, ret is %{public}d", ret);
243 }
244 }
245
ReportCycleProfile()246 void PlatformMonitor::ReportCycleProfile()
247 {
248 HIVIEW_LOGI("report performance profile");
249 PerfMeasure perfMeasure;
250 // report max event size and count
251 GetMaxTotalMeasure(perfMeasure);
252
253 // report event number of each domain
254 GetDomainsStat(perfMeasure);
255
256 // report total number of event, time of break, duration of break
257 GetBreakStat(perfMeasure);
258
259 // report min speed, max speed
260 GetMaxSpeed(perfMeasure);
261
262 // report real time, process time, wait time of cost time interval
263 GetCostTimeInterval(perfMeasure);
264
265 // report percent and total number of over benchmark
266 CalcOverBenckMarkPct(perfMeasure);
267
268 ReportProfile(perfMeasure);
269 HIVIEW_LOGI("report performance profile have done");
270 }
271
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)272 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
273 {
274 std::lock_guard<std::mutex> lock(topMutex_);
275 uint8_t topN = 3; // top n
276 if (topDomains_.size() <= topN) {
277 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
278 domains.emplace_back(it->first);
279 counts.emplace_back(it->second);
280 }
281 return;
282 }
283
284 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
285 counts.emplace_back(it->second);
286 }
287 std::sort(counts.begin(), counts.end(), std::greater<int>());
288 counts.resize(topN);
289 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
290 if (domains.size() >= topN) {
291 break;
292 }
293 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
294 domains.emplace_back(it->first);
295 }
296 }
297 return;
298 }
299
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)300 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
301 {
302 std::lock_guard<std::mutex> lock(topMutex_);
303 uint8_t topN = 3; // top n
304 if (topEvents_.size() <= topN) {
305 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
306 events.emplace_back(it->first);
307 counts.emplace_back(it->second);
308 }
309 return;
310 }
311
312 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
313 counts.emplace_back(it->second);
314 }
315 std::sort(counts.begin(), counts.end(), std::greater<int>());
316 counts.resize(topN);
317 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
318 if (events.size() >= topN) {
319 break;
320 }
321 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
322 events.emplace_back(it->first);
323 }
324 }
325 return;
326 }
327
ReportBreakProfile()328 void PlatformMonitor::ReportBreakProfile()
329 {
330 // report current event size and count
331 uint32_t curTotalCount_ = SysEvent::totalCount_;
332 uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
333
334 // report current speed
335 uint32_t curRealSpeed = curRealSpeed_;
336 uint32_t curProcessSpeed = curProcSpeed_;
337
338 // report average real time, process time, wait time
339 double avgRealTime = avgRealTime_;
340 double avgProcessTime = avgProcessTime_;
341 double avgWaitTime = avgWaitTime_;
342
343 // report topk cost time event
344 std::vector<std::string> events;
345 std::vector<uint32_t> eventCounts;
346 GetTopEvents(events, eventCounts);
347
348 // report topk event and count
349 std::vector<std::string> domains;
350 std::vector<uint32_t> domainCounts;
351 GetTopDomains(domains, domainCounts);
352 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "BREAK", HiSysEvent::EventType::BEHAVIOR,
353 "TOTAL_COUNT", curTotalCount_, "TOTAL_SIZE", curTotalSize_, "REAL_SPEED", curRealSpeed,
354 "PROC_SPEED", curProcessSpeed, "AVG_REAL_TIME", avgRealTime, "AVG_PROC_TIME", avgProcessTime,
355 "AVG_WAIT_TIME", avgWaitTime, "TOP_EVENT", events, "TOP_EVENT_COUNT", eventCounts, "TOP_DOMAIN", domains,
356 "TOP_DOMAIN_COUNT", domainCounts);
357 if (ret != SUCCESS) {
358 HIVIEW_LOGE("failed to write BREAK event, ret is %{public}d", ret);
359 }
360 }
361
ReportRecoverProfile()362 void PlatformMonitor::ReportRecoverProfile()
363 {
364 // report break duration when recovery
365 int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
366 int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "RECOVER", HiSysEvent::EventType::BEHAVIOR,
367 "DURATION", duration);
368 if (ret != SUCCESS) {
369 HIVIEW_LOGE("failed to write RECOVER event, ret is %{public}d", ret);
370 }
371 }
372
Breaking()373 void PlatformMonitor::Breaking()
374 {
375 // collect break count and duration every break
376 if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
377 return;
378 }
379
380 HIVIEW_LOGE("break as event reach critical size %{public}" PRId64, SysEvent::totalSize_.load());
381 breakTimestamp_ = TimeUtil::GenerateTimestamp();
382 ReportBreakProfile();
383 int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
384 while (true) {
385 if (SysEvent::totalSize_ <= recoveryBenchMark) {
386 break;
387 }
388 TimeUtil::Sleep(SLEEP_TEN_SECONDS);
389 }
390 breakCount_++;
391 recoverTimestamp_ = TimeUtil::GenerateTimestamp();
392 breakDuration_ += recoverTimestamp_ - breakTimestamp_;
393 HIVIEW_LOGW("recover after break duration %{public}" PRIu64, breakDuration_);
394 ReportRecoverProfile();
395 }
396
InitData()397 void PlatformMonitor::InitData()
398 {
399 MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
400 if (!monitorConfig.Parse()) {
401 return;
402 }
403
404 monitorConfig.ReadParam("collectPeriod", collectPeriod_);
405 monitorConfig.ReadParam("reportPeriod", reportPeriod_);
406 monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
407 monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
408 monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
409 }
410
StartMonitor(std::shared_ptr<EventLoop> looper)411 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
412 {
413 if (looper == nullptr) {
414 HIVIEW_LOGE("can not get share looper");
415 return;
416 }
417 InitData();
418
419 looper_ = looper;
420
421 auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
422 looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
423 auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
424 looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
425 }
426 } // namespace HiviewDFX
427 } // namespace OHOS
428