1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "platform_monitor.h"
16
17 #include <algorithm>
18 #include <map>
19 #include <memory>
20 #include <mutex>
21 #include <vector>
22
23 #include "hiview_global.h"
24 #include "logger.h"
25 #include "pipeline.h"
26 #include "sys_event_dao.h"
27 #include "sys_event.h"
28 #include "time_util.h"
29 #include "monitor_config.h"
30
31 namespace OHOS {
32 namespace HiviewDFX {
33 DEFINE_LOG_TAG("HiView-Monitor");
34 namespace {
35 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
36 constexpr char EVENT_SERVICE_PLUGIN[] = "SysEventService";
37 };
AccumulateTimeInterval(int64_t costTime,std::map<int8_t,uint32_t> & stat)38 void PlatformMonitor::AccumulateTimeInterval(int64_t costTime, std::map<int8_t, uint32_t> &stat)
39 {
40 std::lock_guard<std::mutex> lock(statMutex_);
41 auto it = std::lower_bound(intervals_, intervals_ + sizeof(intervals_) / sizeof(intervals_[0]), costTime);
42 int index = it - intervals_;
43 stat[index] += 1;
44 }
45
CollectEvent(std::shared_ptr<PipelineEvent> event)46 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
47 {
48 if (event == nullptr) {
49 return;
50 }
51 std::lock_guard<std::mutex> lock(topMutex_);
52 topDomains_[event->domain_]++;
53 topEvents_[event->eventName_]++;
54 }
55
CollectCostTime(PipelineEvent * event)56 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
57 {
58 // collect data after event destory
59 if (event == nullptr) {
60 return;
61 }
62 onceTotalCnt_++;
63 onceTotalRealTime_ += event->realtime_;
64 onceTotalProcTime_ += event->processTime_;
65 onceTotalWaitTime_ += event->processTime_ - event->realtime_;
66 AccumulateTimeInterval(event->realtime_, realStat_);
67 AccumulateTimeInterval(event->processTime_, processStat_);
68 AccumulateTimeInterval((event->processTime_ - event->realtime_), waitTimeStat_);
69 if (event->realtime_ > realTimeBenchMark_) {
70 overRealTotalCount_++;
71 }
72 if (event->processTime_ > processTimeBenchMark_) {
73 overProcessTotalCount_++;
74 }
75 finishedCount_++;
76 HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
77 "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
78 "finishedCount_=%{public}u",
79 onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
80 onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
81 finishedCount_);
82 }
83
CollectPerfProfiler()84 void PlatformMonitor::CollectPerfProfiler()
85 {
86 HIVIEW_LOGI("collect performance profiler");
87 // collect data every 5 minute
88 // collect event max size and max count
89 if (maxTotalCount_ < SysEvent::totalCount_) {
90 maxTotalCount_.store(SysEvent::totalCount_);
91 }
92 if (maxTotalSize_ < SysEvent::totalSize_) {
93 maxTotalSize_.store(SysEvent::totalSize_);
94 }
95 // total count, total size
96 totalCount_ = SysEvent::totalCount_;
97 totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
98 // min speed, max speed
99 uint32_t onceTotalRealTime = onceTotalRealTime_;
100 uint32_t onceTotalProcTime = onceTotalProcTime_;
101 uint32_t onceTotalWaitTime = onceTotalWaitTime_;
102 uint32_t onceTotalCnt = onceTotalCnt_;
103 onceTotalRealTime_ = 0;
104 onceTotalProcTime_ = 0;
105 onceTotalWaitTime_ = 0;
106 onceTotalCnt_ = 0;
107 if (onceTotalRealTime > 0) {
108 curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
109 if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
110 minSpeed_ = curRealSpeed_;
111 }
112 if (curRealSpeed_ > maxSpeed_) {
113 maxSpeed_ = curRealSpeed_;
114 }
115 } else {
116 minSpeed_ = 0;
117 maxSpeed_ = 0;
118 curRealSpeed_ = 0;
119 }
120 if (onceTotalProcTime > 0) {
121 curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
122 } else {
123 curProcSpeed_ = 0;
124 }
125 if (onceTotalCnt > 0) {
126 avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
127 avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
128 avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
129 }
130 HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
131 "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
132 "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
133 "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
134 "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
135 maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
136 onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
137 onceTotalCnt, minSpeed_, maxSpeed_,
138 curRealSpeed_, curProcSpeed_,
139 avgRealTime_, avgProcessTime_, avgWaitTime_);
140 }
141
GetDomainsStat(PerfMeasure & perfMeasure)142 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
143 {
144 std::lock_guard<std::mutex> lock(topMutex_);
145 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
146 perfMeasure.domains.emplace_back(it->first);
147 perfMeasure.domainCounts.emplace_back(it->second);
148 }
149 topDomains_.clear();
150 topEvents_.clear();
151 }
152
GetCostTimeInterval(PerfMeasure & perfMeasure)153 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
154 {
155 std::lock_guard<std::mutex> lock(statMutex_);
156 for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
157 uint32_t realCount = realStat_[index];
158 perfMeasure.realCounts.emplace_back(realCount);
159 uint32_t processCount = processStat_[index];
160 perfMeasure.processCounts.emplace_back(processCount);
161 uint32_t waitCount = waitTimeStat_[index];
162 perfMeasure.waitCounts.emplace_back(waitCount);
163 }
164 realStat_.clear();
165 processStat_.clear();
166 waitTimeStat_.clear();
167 }
168
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)169 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
170 {
171 perfMeasure.finishedCount = finishedCount_;
172 perfMeasure.overRealTotalCount = overRealTotalCount_;
173 perfMeasure.overProcessTotalCount = overProcessTotalCount_;
174 finishedCount_ = 0;
175 overRealTotalCount_ = 0;
176 overProcessTotalCount_ = 0;
177
178 if (perfMeasure.finishedCount > 0) {
179 perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
180 } else if (perfMeasure.overRealTotalCount > 0) {
181 perfMeasure.realPercent = PCT;
182 }
183
184 if (perfMeasure.finishedCount > 0) {
185 perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
186 } else if (perfMeasure.overProcessTotalCount > 0) {
187 perfMeasure.processpercent = PCT;
188 }
189 }
190
GetMaxTotalMeasure(PerfMeasure & perfMeasure)191 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
192 {
193 perfMeasure.maxTotalCount = maxTotalCount_.load();
194 maxTotalCount_.store(0);
195
196 perfMeasure.maxTotalSize = maxTotalSize_.load();
197 maxTotalSize_.store(0);
198 }
199
GetBreakStat(PerfMeasure & perfMeasure)200 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
201 {
202 perfMeasure.totalCount = totalCount_;
203 totalCount_ = 0;
204
205 perfMeasure.totalSize = totalSize_;
206 totalSize_ = 0;
207
208 perfMeasure.breakCount = breakCount_;
209 breakCount_ = 0;
210
211 perfMeasure.breakDuration = breakDuration_;
212 breakDuration_ = 0;
213 }
214
GetMaxSpeed(PerfMeasure & perfMeasure) const215 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
216 {
217 perfMeasure.minSpeed = minSpeed_;
218 perfMeasure.maxSpeed = maxSpeed_;
219 }
220
CreateProfileReport(PerfMeasure & perfMeasure)221 std::shared_ptr<SysEvent> PlatformMonitor::CreateProfileReport(PerfMeasure &perfMeasure)
222 {
223 SysEventCreator eventCreator("HIVIEWDFX", "PROFILE_STAT", SysEventCreator::STATISTIC);
224 eventCreator.SetKeyValue("MAX_TOTAL_COUNT", perfMeasure.maxTotalCount);
225 eventCreator.SetKeyValue("MAX_TOTAL_SIZE", perfMeasure.maxTotalSize);
226 eventCreator.SetKeyValue("DOMAINS", perfMeasure.domains);
227 eventCreator.SetKeyValue("DOMAIN_DETAIL", perfMeasure.domainCounts);
228 eventCreator.SetKeyValue("TOTAL_COUNT", perfMeasure.totalCount);
229 eventCreator.SetKeyValue("TOTAL_SIZE", perfMeasure.totalSize);
230 eventCreator.SetKeyValue("BREAK_COUNT", perfMeasure.breakCount);
231 eventCreator.SetKeyValue("BREAK_DURATION", perfMeasure.breakDuration);
232 eventCreator.SetKeyValue("MIN_SPEED", perfMeasure.minSpeed);
233 eventCreator.SetKeyValue("MAX_SPEED", perfMeasure.maxSpeed);
234 eventCreator.SetKeyValue("REAL_COUNT", perfMeasure.realCounts);
235 eventCreator.SetKeyValue("PROCESS_COUNT", perfMeasure.processCounts);
236 eventCreator.SetKeyValue("WAIT_COUNT", perfMeasure.waitCounts);
237 eventCreator.SetKeyValue("FINISHED_COUNT", perfMeasure.finishedCount);
238 eventCreator.SetKeyValue("OVER_REAL_COUNT", perfMeasure.overRealTotalCount);
239 eventCreator.SetKeyValue("OVER_REAL_PCT", perfMeasure.realPercent);
240 eventCreator.SetKeyValue("OVER_PROC_COUNT", perfMeasure.overProcessTotalCount);
241 eventCreator.SetKeyValue("OVER_PROC_PCT", perfMeasure.processpercent);
242 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
243 return sysEvent;
244 }
245
ReportCycleProfile()246 void PlatformMonitor::ReportCycleProfile()
247 {
248 HIVIEW_LOGI("report performance profile");
249 PerfMeasure perfMeasure;
250 // report max event size and count
251 GetMaxTotalMeasure(perfMeasure);
252
253 // report event number of each domain
254 GetDomainsStat(perfMeasure);
255
256 // report total number of event, time of break, duration of break
257 GetBreakStat(perfMeasure);
258
259 // report min speed, max speed
260 GetMaxSpeed(perfMeasure);
261
262 // report real time, process time, wait time of cost time interval
263 GetCostTimeInterval(perfMeasure);
264
265 // report percent and total number of over benchmark
266 CalcOverBenckMarkPct(perfMeasure);
267
268 std::shared_ptr<SysEvent> sysEvent = CreateProfileReport(perfMeasure);
269 HIVIEW_LOGI("report=%{public}s", sysEvent->jsonExtraInfo_.c_str());
270 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
271 HIVIEW_LOGI("report performance profile have done");
272 }
273
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)274 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
275 {
276 std::lock_guard<std::mutex> lock(topMutex_);
277 uint8_t topN = 3; // top n
278 if (topDomains_.size() <= topN) {
279 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
280 domains.emplace_back(it->first);
281 counts.emplace_back(it->second);
282 }
283 return;
284 }
285
286 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
287 counts.emplace_back(it->second);
288 }
289 std::sort(counts.begin(), counts.end(), std::greater<int>());
290 counts.resize(topN);
291 for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
292 if (domains.size() >= topN) {
293 break;
294 }
295 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
296 domains.emplace_back(it->first);
297 }
298 }
299 return;
300 }
301
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)302 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
303 {
304 std::lock_guard<std::mutex> lock(topMutex_);
305 uint8_t topN = 3; // top n
306 if (topEvents_.size() <= topN) {
307 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
308 events.emplace_back(it->first);
309 counts.emplace_back(it->second);
310 }
311 return;
312 }
313
314 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
315 counts.emplace_back(it->second);
316 }
317 std::sort(counts.begin(), counts.end(), std::greater<int>());
318 counts.resize(topN);
319 for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
320 if (events.size() >= topN) {
321 break;
322 }
323 if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
324 events.emplace_back(it->first);
325 }
326 }
327 return;
328 }
329
ReportBreakProfile()330 void PlatformMonitor::ReportBreakProfile()
331 {
332 // report current event size and count
333 uint32_t curTotalCount_ = SysEvent::totalCount_;
334 uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
335
336 // report current speed
337 uint32_t curRealSpeed = curRealSpeed_;
338 uint32_t curProcessSpeed = curProcSpeed_;
339
340 // report average real time, process time, wait time
341 double avgRealTime = avgRealTime_;
342 double avgProcessTime = avgProcessTime_;
343 double avgWaitTime = avgWaitTime_;
344
345 // report topk cost time event
346 std::vector<std::string> events;
347 std::vector<uint32_t> eventCounts;
348 GetTopEvents(events, eventCounts);
349
350 // report topk event and count
351 std::vector<std::string> domains;
352 std::vector<uint32_t> domainCounts;
353 GetTopDomains(domains, domainCounts);
354 SysEventCreator eventCreator("HIVIEWDFX", "BREAK", SysEventCreator::BEHAVIOR);
355 eventCreator.SetKeyValue("TOTAL_COUNT", curTotalCount_);
356 eventCreator.SetKeyValue("TOTAL_SIZE", curTotalSize_);
357 eventCreator.SetKeyValue("REAL_SPEED", curRealSpeed);
358 eventCreator.SetKeyValue("PROC_SPEED", curProcessSpeed);
359 eventCreator.SetKeyValue("AVG_REAL_TIME", avgRealTime);
360 eventCreator.SetKeyValue("AVG_PROC_TIME", avgProcessTime);
361 eventCreator.SetKeyValue("AVG_WAIT_TIME", avgWaitTime);
362 eventCreator.SetKeyValue("TOP_EVENT", events);
363 eventCreator.SetKeyValue("TOP_EVENT_COUNT", eventCounts);
364 eventCreator.SetKeyValue("TOP_DOMAIN", domains);
365 eventCreator.SetKeyValue("TOP_DOMAIN_COUNT", domainCounts);
366 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
367 HIVIEW_LOGI("report=%{public}s", sysEvent->jsonExtraInfo_.c_str());
368 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
369 }
370
ReportRecoverProfile()371 void PlatformMonitor::ReportRecoverProfile()
372 {
373 // report break duration when recovery
374 int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
375 SysEventCreator eventCreator("HIVIEWDFX", "RECOVER", SysEventCreator::BEHAVIOR);
376 eventCreator.SetKeyValue("DURATION", duration);
377 std::shared_ptr<SysEvent> sysEvent = std::make_shared<SysEvent>("", nullptr, eventCreator);
378 HIVIEW_LOGI("report=%{public}s", sysEvent->jsonExtraInfo_.c_str());
379 HiviewGlobal::GetInstance()->PostSyncEventToTarget(EVENT_SERVICE_PLUGIN, sysEvent);
380 }
381
Breaking()382 void PlatformMonitor::Breaking()
383 {
384 // collect break count and duration every break
385 if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
386 return;
387 }
388
389 HIVIEW_LOGE("break as event reach critical size %{public}u", SysEvent::totalSize_.load());
390 breakTimestamp_ = TimeUtil::GenerateTimestamp();
391 ReportBreakProfile();
392 int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
393 while (true) {
394 if (SysEvent::totalSize_ <= recoveryBenchMark) {
395 break;
396 }
397 TimeUtil::Sleep(SLEEP_TEN_SECONDS);
398 }
399 breakCount_++;
400 recoverTimestamp_ = TimeUtil::GenerateTimestamp();
401 breakDuration_ += recoverTimestamp_ - breakTimestamp_;
402 HIVIEW_LOGW("recover after break duration %{public}ld", breakDuration_);
403 ReportRecoverProfile();
404 }
405
InitData()406 void PlatformMonitor::InitData()
407 {
408 MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
409 if (!monitorConfig.Parse()) {
410 return;
411 }
412
413 monitorConfig.ReadParam("collectPeriod", collectPeriod_);
414 monitorConfig.ReadParam("reportPeriod", reportPeriod_);
415 monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
416 monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
417 monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
418 }
419
StartMonitor(std::shared_ptr<EventLoop> looper)420 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
421 {
422 if (looper == nullptr) {
423 HIVIEW_LOGE("can not get share looper");
424 return;
425 }
426 InitData();
427
428 looper_ = looper;
429
430 auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
431 looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
432 auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
433 looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
434 }
435 } // namespace HiviewDFX
436 } // namespace OHOS