1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "platform_monitor.h"
16
17 #include <algorithm>
18 #include <map>
19 #include <memory>
20 #include <mutex>
21 #include <vector>
22
23 #include "hiview_global.h"
24 #include "logger.h"
25 #include "pipeline.h"
26 #include "sys_event_dao.h"
27 #include "sys_event.h"
28 #include "time_util.h"
29 #include "monitor_config.h"
30
31 namespace OHOS {
32 namespace HiviewDFX {
33 DEFINE_LOG_TAG("HiView-Monitor");
34 namespace {
35 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
36 constexpr char EVENT_SERVICE_PLUGIN[] = "SysEventService";
37 };
AccumulateTimeInterval(int64_t costTime,std::map<int8_t,uint32_t> & stat)38 void PlatformMonitor::AccumulateTimeInterval(int64_t costTime, std::map<int8_t, uint32_t> &stat)
39 {
40 std::lock_guard<std::mutex> lock(statMutex_);
41 auto it = std::lower_bound(intervals_, intervals_ + sizeof(intervals_) / sizeof(intervals_[0]), costTime);
42 int index = it - intervals_;
43 stat[index] += 1;
44 }
45
CollectEvent(std::shared_ptr<PipelineEvent> event)46 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
47 {
48 if (event == nullptr) {
49 return;
50 }
51 std::lock_guard<std::mutex> lock(topMutex_);
52 topDomains_[event->domain_]++;
53 topEvents_[event->eventName_]++;
54 }
55
CollectCostTime(PipelineEvent * event)56 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
57 {
58 // collect data after event destory
59 if (event == nullptr) {
60 return;
61 }
62 onceTotalCnt_++;
63 onceTotalRealTime_ += event->realtime_;
64 onceTotalProcTime_ += event->processTime_;
65 onceTotalWaitTime_ += event->processTime_ - event->realtime_;
66 AccumulateTimeInterval(event->realtime_, realStat_);
67 AccumulateTimeInterval(event->processTime_, processStat_);
68 AccumulateTimeInterval((event->processTime_ - event->realtime_), waitTimeStat_);
69 if (event->realtime_ > realTimeBenchMark_) {
70 overRealTotalCount_++;
71 }
72 if (event->processTime_ > processTimeBenchMark_) {
73 overProcessTotalCount_++;
74 }
75 finishedCount_++;
76 HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
77 "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
78 "finishedCount_=%{public}u",
79 onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
80 onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
81 finishedCount_);
82 }
83
CollectPerfProfiler()84 void PlatformMonitor::CollectPerfProfiler()
85 {
86 HIVIEW_LOGI("collect performance profiler");
87 // collect data every 5 minute
88 // collect event max size and max count
89 if (maxTotalCount_ < SysEvent::totalCount_) {
90 maxTotalCount_.store(SysEvent::totalCount_);
91 }
92 if (maxTotalSize_ < SysEvent::totalSize_) {
93 maxTotalSize_.store(SysEvent::totalSize_);
94 }
95 // total count, total size
96 totalCount_ = SysEvent::totalCount_;
97 totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
98 // min speed, max speed
99 uint32_t onceTotalRealTime = onceTotalRealTime_;
100 uint32_t onceTotalProcTime = onceTotalProcTime_;
101 uint32_t onceTotalWaitTime = onceTotalWaitTime_;
102 uint32_t onceTotalCnt = onceTotalCnt_;
103 onceTotalRealTime_ = 0;
104 onceTotalProcTime_ = 0;
105 onceTotalWaitTime_ = 0;
106 onceTotalCnt_ = 0;
107 if (onceTotalRealTime > 0) {
108 curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
109 if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
110 minSpeed_ = curRealSpeed_;
111 }
112 if (curRealSpeed_ > maxSpeed_) {
113 maxSpeed_ = curRealSpeed_;
114 }
115 } else {
116 minSpeed_ = 0;
117 maxSpeed_ = 0;
118 curRealSpeed_ = 0;
119 }
120 if (onceTotalProcTime > 0) {
121 curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
122 } else {
123 curProcSpeed_ = 0;
124 }
125 if (onceTotalCnt > 0) {
126 avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
127 avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
128 avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
129 }
130 HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
131 "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
132 "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
133 "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
134 "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
135 maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
136 onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
137 onceTotalCnt, minSpeed_, maxSpeed_,
138 curRealSpeed_, curProcSpeed_,
139 avgRealTime_, avgProcessTime_, avgWaitTime_);
140 }
141
GetDomainsStat(PerfMeasure & perfMeasure)142 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
143 {
144 std::lock_guard<std::mutex> lock(topMutex_);
145 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
146 perfMeasure.domains.emplace_back(it->first);
147 perfMeasure.domainCounts.emplace_back(it->second);
148 }
149 topDomains_.clear();
150 topEvents_.clear();
151 }
152
GetCostTimeInterval(PerfMeasure & perfMeasure)153 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
154 {
155 std::lock_guard<std::mutex> lock(statMutex_);
156 for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
157 uint32_t realCount = realStat_[index];
158 perfMeasure.realCounts.emplace_back(realCount);
159 uint32_t processCount = processStat_[index];
160 perfMeasure.processCounts.emplace_back(processCount);
161 uint32_t waitCount = waitTimeStat_[index];
162 perfMeasure.waitCounts.emplace_back(waitCount);
163 }
164 realStat_.clear();
165 processStat_.clear();
166 waitTimeStat_.clear();
167 }
168
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)169 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
170 {
171 perfMeasure.finishedCount = finishedCount_;
172 perfMeasure.overRealTotalCount = overRealTotalCount_;
173 perfMeasure.overProcessTotalCount = overProcessTotalCount_;
174 finishedCount_ = 0;
175 overRealTotalCount_ = 0;
176 overProcessTotalCount_ = 0;
177
178 if (perfMeasure.finishedCount > 0) {
179 perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
180 } else if (perfMeasure.overRealTotalCount > 0) {
181 perfMeasure.realPercent = PCT;
182 }
183
184 if (perfMeasure.finishedCount > 0) {
185 perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
186 } else if (perfMeasure.overProcessTotalCount > 0) {
187 perfMeasure.processpercent = PCT;
188 }
189 }
190
GetMaxTotalMeasure(PerfMeasure & perfMeasure)191 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
192 {
193 perfMeasure.maxTotalCount = maxTotalCount_.load();
194 maxTotalCount_.store(0);
195
196 perfMeasure.maxTotalSize = maxTotalSize_.load();
197 maxTotalSize_.store(0);
198 }
199
GetBreakStat(PerfMeasure & perfMeasure)200 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
201 {
202 perfMeasure.totalCount = totalCount_;
203 totalCount_ = 0;
204
205 perfMeasure.totalSize = totalSize_;
206 totalSize_ = 0;
207
208 perfMeasure.breakCount = breakCount_;
209 breakCount_ = 0;
210
211 perfMeasure.breakDuration = breakDuration_;
212 breakDuration_ = 0;
213 }
214
GetMaxSpeed(PerfMeasure & perfMeasure) const215 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
216 {
217 perfMeasure.minSpeed = minSpeed_;
218 perfMeasure.maxSpeed = maxSpeed_;
219 }
220
CreateProfileReport(PerfMeasure & perfMeasure)221 std::shared_ptr<SysEvent> PlatformMonitor::CreateProfileReport(PerfMeasure &perfMeasure)
222 {
223 SysEventCreator eventCreator("HIVIEWDFX", "PROFILE_STAT", SysEventCreator::STATISTIC);
224 eventCreator.SetKeyValue("MAX_TOTAL_COUNT", perfMeasure.maxTotalCount);
225 eventCreator.SetKeyValue("MAX_TOTAL_SIZE", perfMeasure.maxTotalSize);
226 eventCreator.SetKeyValue("DOMAINS", perfMeasure.domains);
227 eventCreator.SetKeyValue("DOMAIN_DETAIL", perfMeasure.domainCounts);
228 eventCreator.SetKeyValue("TOTAL_COUNT", perfMeasure.totalCount);
229 eventCreator.SetKeyValue("TOTAL_SIZE", perfMeasure.totalSize);
230 eventCreator.SetKeyValue("BREAK_COUNT", perfMeasure.breakCount);
231 eventCreator.SetKeyValue("BREAK_DURATION", perfMeasure.breakDuration);
232 eventCreator.SetKeyValue("MIN_SPEED", perfMeasure.minSpeed);
233 eventCreator.SetKeyValue("MAX_SPEED", perfMeasure.maxSpeed);
234 eventCreator.SetKeyValue("REAL_COUNT", perfMeasure.realCounts);
235 eventCreator.SetKeyValue("PROCESS_COUNT", perfMeasure.processCounts);
236 eventCreator.SetKeyValue("WAIT_COUNT", perfMeasure.waitCounts);
237 eventCreator.SetKeyValue("FINISHED_COUNT", perfMeasure.finishedCount);
238 eventCreator.SetKeyValue("OVER_REAL_COUNT", perfMeasure.overRealTotalCount);
239 eventCreator.SetKeyValue("OVER_REAL_PCT", perfMeasure.realPercent);
240 eventCreator.SetKeyValue("OVER_PROC_COUNT", perfMeasure.overProcessTotalCount);
241 eventCreator.SetKeyValue("OVER_PROC_PCT", perfMeasure.processpercent);
242 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
243 return sysEvent;
244 }
245
ReportCycleProfile()246 void PlatformMonitor::ReportCycleProfile()
247 {
248 HIVIEW_LOGI("report performance profile");
249 PerfMeasure perfMeasure;
250 // report max event size and count
251 GetMaxTotalMeasure(perfMeasure);
252
253 // report event number of each domain
254 GetDomainsStat(perfMeasure);
255
256 // report total number of event, time of break, duration of break
257 GetBreakStat(perfMeasure);
258
259 // report min speed, max speed
260 GetMaxSpeed(perfMeasure);
261
262 // report real time, process time, wait time of cost time interval
263 GetCostTimeInterval(perfMeasure);
264
265 // report percent and total number of over benchmark
266 CalcOverBenckMarkPct(perfMeasure);
267
268 std::shared_ptr<SysEvent> sysEvent = CreateProfileReport(perfMeasure);
269 if (sysEvent == nullptr) {
270 return;
271 }
272 HIVIEW_LOGI("report=%{public}s", sysEvent->AsJsonStr().c_str());
273 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
274 HIVIEW_LOGI("report performance profile have done");
275 }
276
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)277 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
278 {
279 std::lock_guard<std::mutex> lock(topMutex_);
280 uint8_t topN = 3; // top n
281 if (topDomains_.size() <= topN) {
282 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
283 domains.emplace_back(it->first);
284 counts.emplace_back(it->second);
285 }
286 return;
287 }
288
289 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
290 counts.emplace_back(it->second);
291 }
292 std::sort(counts.begin(), counts.end(), std::greater<int>());
293 counts.resize(topN);
294 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
295 if (domains.size() >= topN) {
296 break;
297 }
298 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
299 domains.emplace_back(it->first);
300 }
301 }
302 return;
303 }
304
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)305 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
306 {
307 std::lock_guard<std::mutex> lock(topMutex_);
308 uint8_t topN = 3; // top n
309 if (topEvents_.size() <= topN) {
310 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
311 events.emplace_back(it->first);
312 counts.emplace_back(it->second);
313 }
314 return;
315 }
316
317 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
318 counts.emplace_back(it->second);
319 }
320 std::sort(counts.begin(), counts.end(), std::greater<int>());
321 counts.resize(topN);
322 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
323 if (events.size() >= topN) {
324 break;
325 }
326 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
327 events.emplace_back(it->first);
328 }
329 }
330 return;
331 }
332
ReportBreakProfile()333 void PlatformMonitor::ReportBreakProfile()
334 {
335 // report current event size and count
336 uint32_t curTotalCount_ = SysEvent::totalCount_;
337 uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
338
339 // report current speed
340 uint32_t curRealSpeed = curRealSpeed_;
341 uint32_t curProcessSpeed = curProcSpeed_;
342
343 // report average real time, process time, wait time
344 double avgRealTime = avgRealTime_;
345 double avgProcessTime = avgProcessTime_;
346 double avgWaitTime = avgWaitTime_;
347
348 // report topk cost time event
349 std::vector<std::string> events;
350 std::vector<uint32_t> eventCounts;
351 GetTopEvents(events, eventCounts);
352
353 // report topk event and count
354 std::vector<std::string> domains;
355 std::vector<uint32_t> domainCounts;
356 GetTopDomains(domains, domainCounts);
357 SysEventCreator eventCreator("HIVIEWDFX", "BREAK", SysEventCreator::BEHAVIOR);
358 eventCreator.SetKeyValue("TOTAL_COUNT", curTotalCount_);
359 eventCreator.SetKeyValue("TOTAL_SIZE", curTotalSize_);
360 eventCreator.SetKeyValue("REAL_SPEED", curRealSpeed);
361 eventCreator.SetKeyValue("PROC_SPEED", curProcessSpeed);
362 eventCreator.SetKeyValue("AVG_REAL_TIME", avgRealTime);
363 eventCreator.SetKeyValue("AVG_PROC_TIME", avgProcessTime);
364 eventCreator.SetKeyValue("AVG_WAIT_TIME", avgWaitTime);
365 eventCreator.SetKeyValue("TOP_EVENT", events);
366 eventCreator.SetKeyValue("TOP_EVENT_COUNT", eventCounts);
367 eventCreator.SetKeyValue("TOP_DOMAIN", domains);
368 eventCreator.SetKeyValue("TOP_DOMAIN_COUNT", domainCounts);
369 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
370 HIVIEW_LOGI("report=%{public}s", sysEvent->AsJsonStr().c_str());
371 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
372 }
373
ReportRecoverProfile()374 void PlatformMonitor::ReportRecoverProfile()
375 {
376 // report break duration when recovery
377 int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
378 SysEventCreator eventCreator("HIVIEWDFX", "RECOVER", SysEventCreator::BEHAVIOR);
379 eventCreator.SetKeyValue("DURATION", duration);
380 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
381 HIVIEW_LOGI("report=%{public}s", sysEvent->AsJsonStr().c_str());
382 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
383 }
384
Breaking()385 void PlatformMonitor::Breaking()
386 {
387 // collect break count and duration every break
388 if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
389 return;
390 }
391
392 HIVIEW_LOGE("break as event reach critical size %{public}u", SysEvent::totalSize_.load());
393 breakTimestamp_ = TimeUtil::GenerateTimestamp();
394 ReportBreakProfile();
395 int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
396 while (true) {
397 if (SysEvent::totalSize_ <= recoveryBenchMark) {
398 break;
399 }
400 TimeUtil::Sleep(SLEEP_TEN_SECONDS);
401 }
402 breakCount_++;
403 recoverTimestamp_ = TimeUtil::GenerateTimestamp();
404 breakDuration_ += recoverTimestamp_ - breakTimestamp_;
405 HIVIEW_LOGW("recover after break duration %{public}ld", breakDuration_);
406 ReportRecoverProfile();
407 }
408
InitData()409 void PlatformMonitor::InitData()
410 {
411 MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
412 if (!monitorConfig.Parse()) {
413 return;
414 }
415
416 monitorConfig.ReadParam("collectPeriod", collectPeriod_);
417 monitorConfig.ReadParam("reportPeriod", reportPeriod_);
418 monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
419 monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
420 monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
421 }
422
StartMonitor(std::shared_ptr<EventLoop> looper)423 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
424 {
425 if (looper == nullptr) {
426 HIVIEW_LOGE("can not get share looper");
427 return;
428 }
429 InitData();
430
431 looper_ = looper;
432
433 auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
434 looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
435 auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
436 looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
437 }
438 } // namespace HiviewDFX
439 } // namespace OHOS