1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "platform_monitor.h"
16
17 #include <algorithm>
18 #include <cinttypes>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <vector>
23
24 #include "hiview_global.h"
25 #include "logger.h"
26 #include "pipeline.h"
27 #include "sys_event_dao.h"
28 #include "sys_event.h"
29 #include "time_util.h"
30 #include "monitor_config.h"
31
32 namespace OHOS {
33 namespace HiviewDFX {
34 DEFINE_LOG_TAG("HiView-Monitor");
35 namespace {
36 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
37 constexpr char EVENT_SERVICE_PLUGIN[] = "SysEventService";
38 };
AccumulateTimeInterval(int64_t costTime,std::map<int8_t,uint32_t> & stat)39 void PlatformMonitor::AccumulateTimeInterval(int64_t costTime, std::map<int8_t, uint32_t> &stat)
40 {
41 std::lock_guard<std::mutex> lock(statMutex_);
42 auto it = std::lower_bound(intervals_, intervals_ + sizeof(intervals_) / sizeof(intervals_[0]), costTime);
43 int index = it - intervals_;
44 stat[index] += 1;
45 }
46
CollectEvent(std::shared_ptr<PipelineEvent> event)47 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
48 {
49 if (event == nullptr) {
50 return;
51 }
52 std::lock_guard<std::mutex> lock(topMutex_);
53 topDomains_[event->domain_]++;
54 topEvents_[event->eventName_]++;
55 }
56
CollectCostTime(PipelineEvent * event)57 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
58 {
59 // collect data after event destory
60 if (event == nullptr) {
61 return;
62 }
63 onceTotalCnt_++;
64 onceTotalRealTime_ += event->realtime_;
65 onceTotalProcTime_ += event->processTime_;
66 onceTotalWaitTime_ += event->processTime_ - event->realtime_;
67 AccumulateTimeInterval(event->realtime_, realStat_);
68 AccumulateTimeInterval(event->processTime_, processStat_);
69 AccumulateTimeInterval((event->processTime_ - event->realtime_), waitTimeStat_);
70 if (event->realtime_ > realTimeBenchMark_) {
71 overRealTotalCount_++;
72 }
73 if (event->processTime_ > processTimeBenchMark_) {
74 overProcessTotalCount_++;
75 }
76 finishedCount_++;
77 HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
78 "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
79 "finishedCount_=%{public}u",
80 onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
81 onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
82 finishedCount_);
83 }
84
CollectPerfProfiler()85 void PlatformMonitor::CollectPerfProfiler()
86 {
87 HIVIEW_LOGI("collect performance profiler");
88 // collect data every 5 minute
89 // collect event max size and max count
90 if (maxTotalCount_ < SysEvent::totalCount_) {
91 maxTotalCount_.store(SysEvent::totalCount_);
92 }
93 if (maxTotalSize_ < SysEvent::totalSize_) {
94 maxTotalSize_.store(SysEvent::totalSize_);
95 }
96 // total count, total size
97 totalCount_ = SysEvent::totalCount_;
98 totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
99 // min speed, max speed
100 uint32_t onceTotalRealTime = onceTotalRealTime_;
101 uint32_t onceTotalProcTime = onceTotalProcTime_;
102 uint32_t onceTotalWaitTime = onceTotalWaitTime_;
103 uint32_t onceTotalCnt = onceTotalCnt_;
104 onceTotalRealTime_ = 0;
105 onceTotalProcTime_ = 0;
106 onceTotalWaitTime_ = 0;
107 onceTotalCnt_ = 0;
108 if (onceTotalRealTime > 0) {
109 curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
110 if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
111 minSpeed_ = curRealSpeed_;
112 }
113 if (curRealSpeed_ > maxSpeed_) {
114 maxSpeed_ = curRealSpeed_;
115 }
116 } else {
117 minSpeed_ = 0;
118 maxSpeed_ = 0;
119 curRealSpeed_ = 0;
120 }
121 if (onceTotalProcTime > 0) {
122 curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
123 } else {
124 curProcSpeed_ = 0;
125 }
126 if (onceTotalCnt > 0) {
127 avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
128 avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
129 avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
130 }
131 HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
132 "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
133 "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
134 "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
135 "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
136 maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
137 onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
138 onceTotalCnt, minSpeed_, maxSpeed_,
139 curRealSpeed_, curProcSpeed_,
140 avgRealTime_, avgProcessTime_, avgWaitTime_);
141 }
142
GetDomainsStat(PerfMeasure & perfMeasure)143 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
144 {
145 std::lock_guard<std::mutex> lock(topMutex_);
146 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
147 perfMeasure.domains.emplace_back(it->first);
148 perfMeasure.domainCounts.emplace_back(it->second);
149 }
150 topDomains_.clear();
151 topEvents_.clear();
152 }
153
GetCostTimeInterval(PerfMeasure & perfMeasure)154 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
155 {
156 std::lock_guard<std::mutex> lock(statMutex_);
157 for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
158 uint32_t realCount = realStat_[index];
159 perfMeasure.realCounts.emplace_back(realCount);
160 uint32_t processCount = processStat_[index];
161 perfMeasure.processCounts.emplace_back(processCount);
162 uint32_t waitCount = waitTimeStat_[index];
163 perfMeasure.waitCounts.emplace_back(waitCount);
164 }
165 realStat_.clear();
166 processStat_.clear();
167 waitTimeStat_.clear();
168 }
169
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)170 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
171 {
172 perfMeasure.finishedCount = finishedCount_;
173 perfMeasure.overRealTotalCount = overRealTotalCount_;
174 perfMeasure.overProcessTotalCount = overProcessTotalCount_;
175 finishedCount_ = 0;
176 overRealTotalCount_ = 0;
177 overProcessTotalCount_ = 0;
178
179 if (perfMeasure.finishedCount > 0) {
180 perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
181 } else if (perfMeasure.overRealTotalCount > 0) {
182 perfMeasure.realPercent = PCT;
183 }
184
185 if (perfMeasure.finishedCount > 0) {
186 perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
187 } else if (perfMeasure.overProcessTotalCount > 0) {
188 perfMeasure.processpercent = PCT;
189 }
190 }
191
GetMaxTotalMeasure(PerfMeasure & perfMeasure)192 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
193 {
194 perfMeasure.maxTotalCount = maxTotalCount_.load();
195 maxTotalCount_.store(0);
196
197 perfMeasure.maxTotalSize = maxTotalSize_.load();
198 maxTotalSize_.store(0);
199 }
200
GetBreakStat(PerfMeasure & perfMeasure)201 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
202 {
203 perfMeasure.totalCount = totalCount_;
204 totalCount_ = 0;
205
206 perfMeasure.totalSize = totalSize_;
207 totalSize_ = 0;
208
209 perfMeasure.breakCount = breakCount_;
210 breakCount_ = 0;
211
212 perfMeasure.breakDuration = breakDuration_;
213 breakDuration_ = 0;
214 }
215
GetMaxSpeed(PerfMeasure & perfMeasure) const216 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
217 {
218 perfMeasure.minSpeed = minSpeed_;
219 perfMeasure.maxSpeed = maxSpeed_;
220 }
221
CreateProfileReport(PerfMeasure & perfMeasure)222 std::shared_ptr<SysEvent> PlatformMonitor::CreateProfileReport(PerfMeasure &perfMeasure)
223 {
224 SysEventCreator eventCreator("HIVIEWDFX", "PROFILE_STAT", SysEventCreator::STATISTIC);
225 eventCreator.SetKeyValue("MAX_TOTAL_COUNT", perfMeasure.maxTotalCount);
226 eventCreator.SetKeyValue("MAX_TOTAL_SIZE", perfMeasure.maxTotalSize);
227 eventCreator.SetKeyValue("DOMAINS", perfMeasure.domains);
228 eventCreator.SetKeyValue("DOMAIN_DETAIL", perfMeasure.domainCounts);
229 eventCreator.SetKeyValue("TOTAL_COUNT", perfMeasure.totalCount);
230 eventCreator.SetKeyValue("TOTAL_SIZE", perfMeasure.totalSize);
231 eventCreator.SetKeyValue("BREAK_COUNT", perfMeasure.breakCount);
232 eventCreator.SetKeyValue("BREAK_DURATION", perfMeasure.breakDuration);
233 eventCreator.SetKeyValue("MIN_SPEED", perfMeasure.minSpeed);
234 eventCreator.SetKeyValue("MAX_SPEED", perfMeasure.maxSpeed);
235 eventCreator.SetKeyValue("REAL_COUNT", perfMeasure.realCounts);
236 eventCreator.SetKeyValue("PROCESS_COUNT", perfMeasure.processCounts);
237 eventCreator.SetKeyValue("WAIT_COUNT", perfMeasure.waitCounts);
238 eventCreator.SetKeyValue("FINISHED_COUNT", perfMeasure.finishedCount);
239 eventCreator.SetKeyValue("OVER_REAL_COUNT", perfMeasure.overRealTotalCount);
240 eventCreator.SetKeyValue("OVER_REAL_PCT", perfMeasure.realPercent);
241 eventCreator.SetKeyValue("OVER_PROC_COUNT", perfMeasure.overProcessTotalCount);
242 eventCreator.SetKeyValue("OVER_PROC_PCT", perfMeasure.processpercent);
243 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
244 return sysEvent;
245 }
246
ReportCycleProfile()247 void PlatformMonitor::ReportCycleProfile()
248 {
249 HIVIEW_LOGI("report performance profile");
250 PerfMeasure perfMeasure;
251 // report max event size and count
252 GetMaxTotalMeasure(perfMeasure);
253
254 // report event number of each domain
255 GetDomainsStat(perfMeasure);
256
257 // report total number of event, time of break, duration of break
258 GetBreakStat(perfMeasure);
259
260 // report min speed, max speed
261 GetMaxSpeed(perfMeasure);
262
263 // report real time, process time, wait time of cost time interval
264 GetCostTimeInterval(perfMeasure);
265
266 // report percent and total number of over benchmark
267 CalcOverBenckMarkPct(perfMeasure);
268
269 std::shared_ptr<SysEvent> sysEvent = CreateProfileReport(perfMeasure);
270 if (sysEvent == nullptr) {
271 return;
272 }
273 HIVIEW_LOGI("report event[%{public}s|%{public}s|%{public}" PRIu64 "].", sysEvent->domain_.c_str(),
274 sysEvent->eventName_.c_str(), sysEvent->GetEventUintValue("time_"));
275 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
276 HIVIEW_LOGI("report performance profile have done");
277 }
278
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)279 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
280 {
281 std::lock_guard<std::mutex> lock(topMutex_);
282 uint8_t topN = 3; // top n
283 if (topDomains_.size() <= topN) {
284 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
285 domains.emplace_back(it->first);
286 counts.emplace_back(it->second);
287 }
288 return;
289 }
290
291 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
292 counts.emplace_back(it->second);
293 }
294 std::sort(counts.begin(), counts.end(), std::greater<int>());
295 counts.resize(topN);
296 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
297 if (domains.size() >= topN) {
298 break;
299 }
300 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
301 domains.emplace_back(it->first);
302 }
303 }
304 return;
305 }
306
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)307 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
308 {
309 std::lock_guard<std::mutex> lock(topMutex_);
310 uint8_t topN = 3; // top n
311 if (topEvents_.size() <= topN) {
312 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
313 events.emplace_back(it->first);
314 counts.emplace_back(it->second);
315 }
316 return;
317 }
318
319 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
320 counts.emplace_back(it->second);
321 }
322 std::sort(counts.begin(), counts.end(), std::greater<int>());
323 counts.resize(topN);
324 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
325 if (events.size() >= topN) {
326 break;
327 }
328 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
329 events.emplace_back(it->first);
330 }
331 }
332 return;
333 }
334
ReportBreakProfile()335 void PlatformMonitor::ReportBreakProfile()
336 {
337 // report current event size and count
338 uint32_t curTotalCount_ = SysEvent::totalCount_;
339 uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
340
341 // report current speed
342 uint32_t curRealSpeed = curRealSpeed_;
343 uint32_t curProcessSpeed = curProcSpeed_;
344
345 // report average real time, process time, wait time
346 double avgRealTime = avgRealTime_;
347 double avgProcessTime = avgProcessTime_;
348 double avgWaitTime = avgWaitTime_;
349
350 // report topk cost time event
351 std::vector<std::string> events;
352 std::vector<uint32_t> eventCounts;
353 GetTopEvents(events, eventCounts);
354
355 // report topk event and count
356 std::vector<std::string> domains;
357 std::vector<uint32_t> domainCounts;
358 GetTopDomains(domains, domainCounts);
359 SysEventCreator eventCreator("HIVIEWDFX", "BREAK", SysEventCreator::BEHAVIOR);
360 eventCreator.SetKeyValue("TOTAL_COUNT", curTotalCount_);
361 eventCreator.SetKeyValue("TOTAL_SIZE", curTotalSize_);
362 eventCreator.SetKeyValue("REAL_SPEED", curRealSpeed);
363 eventCreator.SetKeyValue("PROC_SPEED", curProcessSpeed);
364 eventCreator.SetKeyValue("AVG_REAL_TIME", avgRealTime);
365 eventCreator.SetKeyValue("AVG_PROC_TIME", avgProcessTime);
366 eventCreator.SetKeyValue("AVG_WAIT_TIME", avgWaitTime);
367 eventCreator.SetKeyValue("TOP_EVENT", events);
368 eventCreator.SetKeyValue("TOP_EVENT_COUNT", eventCounts);
369 eventCreator.SetKeyValue("TOP_DOMAIN", domains);
370 eventCreator.SetKeyValue("TOP_DOMAIN_COUNT", domainCounts);
371 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
372 HIVIEW_LOGI("report event[%{public}s|%{public}s|%{public}" PRIu64 "].", sysEvent->domain_.c_str(),
373 sysEvent->eventName_.c_str(), sysEvent->GetEventUintValue("time_"));
374 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
375 }
376
ReportRecoverProfile()377 void PlatformMonitor::ReportRecoverProfile()
378 {
379 // report break duration when recovery
380 int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
381 SysEventCreator eventCreator("HIVIEWDFX", "RECOVER", SysEventCreator::BEHAVIOR);
382 eventCreator.SetKeyValue("DURATION", duration);
383 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
384 HIVIEW_LOGI("report event[%{public}s|%{public}s|%{public}" PRIu64 "].", sysEvent->domain_.c_str(),
385 sysEvent->eventName_.c_str(), sysEvent->GetEventUintValue("time_"));
386 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
387 }
388
Breaking()389 void PlatformMonitor::Breaking()
390 {
391 // collect break count and duration every break
392 if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
393 return;
394 }
395
396 HIVIEW_LOGE("break as event reach critical size %{public}" PRId64, SysEvent::totalSize_.load());
397 breakTimestamp_ = TimeUtil::GenerateTimestamp();
398 ReportBreakProfile();
399 int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
400 while (true) {
401 if (SysEvent::totalSize_ <= recoveryBenchMark) {
402 break;
403 }
404 TimeUtil::Sleep(SLEEP_TEN_SECONDS);
405 }
406 breakCount_++;
407 recoverTimestamp_ = TimeUtil::GenerateTimestamp();
408 breakDuration_ += recoverTimestamp_ - breakTimestamp_;
409 HIVIEW_LOGW("recover after break duration %{public}" PRIu64, breakDuration_);
410 ReportRecoverProfile();
411 }
412
InitData()413 void PlatformMonitor::InitData()
414 {
415 MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
416 if (!monitorConfig.Parse()) {
417 return;
418 }
419
420 monitorConfig.ReadParam("collectPeriod", collectPeriod_);
421 monitorConfig.ReadParam("reportPeriod", reportPeriod_);
422 monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
423 monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
424 monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
425 }
426
StartMonitor(std::shared_ptr<EventLoop> looper)427 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
428 {
429 if (looper == nullptr) {
430 HIVIEW_LOGE("can not get share looper");
431 return;
432 }
433 InitData();
434
435 looper_ = looper;
436
437 auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
438 looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
439 auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
440 looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
441 }
442 } // namespace HiviewDFX
443 } // namespace OHOS