• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "platform_monitor.h"
16 
17 #include <algorithm>
18 #include <cinttypes>
19 #include <map>
20 #include <memory>
21 #include <mutex>
22 #include <vector>
23 
24 #include "hisysevent.h"
25 #include "hiview_global.h"
26 #include "hiview_logger.h"
27 #include "pipeline.h"
28 #include "sys_event_dao.h"
29 #include "sys_event.h"
30 #include "time_util.h"
31 #include "monitor_config.h"
32 
33 namespace OHOS {
34 namespace HiviewDFX {
35 DEFINE_LOG_TAG("HiView-Monitor");
36 namespace {
37 constexpr uint8_t SLEEP_TEN_SECONDS = 10;
38 };
39 
AccumulateTimeInterval(uint64_t costTime,std::map<int8_t,uint32_t> & stat)40 void PlatformMonitor::AccumulateTimeInterval(uint64_t costTime, std::map<int8_t, uint32_t> &stat)
41 {
42     std::lock_guard<std::mutex> lock(statMutex_);
43     auto lastPos = std::end(intervals_);
44     auto it = std::lower_bound(intervals_, lastPos, costTime);
45     if (it == lastPos) {
46         HIVIEW_LOGD("lower bound base on %{public}" PRIu64 " not found", costTime);
47         return;
48     }
49     int index = it - intervals_;
50     stat[index] += 1;
51 }
52 
CollectEvent(std::shared_ptr<PipelineEvent> event)53 void PlatformMonitor::CollectEvent(std::shared_ptr<PipelineEvent> event)
54 {
55     if (event == nullptr) {
56         return;
57     }
58     std::lock_guard<std::mutex> lock(topMutex_);
59     topDomains_[event->domain_]++;
60     topEvents_[event->eventName_]++;
61 }
62 
CollectCostTime(PipelineEvent * event)63 void PlatformMonitor::CollectCostTime(PipelineEvent *event)
64 {
65     // collect data after event destory
66     if (event == nullptr) {
67         return;
68     }
69     onceTotalCnt_++;
70     onceTotalRealTime_ += event->realtime_;
71     onceTotalProcTime_ += event->processTime_;
72     uint64_t waitTime = event->processTime_ > event->realtime_ ? (event->processTime_ - event->realtime_) : 0;
73     onceTotalWaitTime_ += waitTime;
74     AccumulateTimeInterval(event->realtime_, realStat_);
75     AccumulateTimeInterval(event->processTime_, processStat_);
76     AccumulateTimeInterval(waitTime, waitTimeStat_);
77     if (event->realtime_ > realTimeBenchMark_) {
78         overRealTotalCount_++;
79     }
80     if (event->processTime_ > processTimeBenchMark_) {
81         overProcessTotalCount_++;
82     }
83     finishedCount_++;
84     HIVIEW_LOGD("onceTotalCnt_=%{public}u, onceTotalRealTime_=%{public}u, onceTotalProcTime_=%{public}u, "
85         "onceTotalWaitTime_=%{public}u, overRealTotalCount_=%{public}u, overProcessTotalCount_=%{public}u, "
86         "finishedCount_=%{public}u",
87         onceTotalCnt_, onceTotalRealTime_, onceTotalProcTime_,
88         onceTotalWaitTime_, overRealTotalCount_, overProcessTotalCount_,
89         finishedCount_);
90 }
91 
CollectPerfProfiler()92 void PlatformMonitor::CollectPerfProfiler()
93 {
94     HIVIEW_LOGI("collect performance profiler");
95     // collect data every 5 minute
96     // collect event max size and max count
97     if (maxTotalCount_ < SysEvent::totalCount_) {
98         maxTotalCount_.store(SysEvent::totalCount_);
99     }
100     if (maxTotalSize_ < SysEvent::totalSize_) {
101         maxTotalSize_.store(SysEvent::totalSize_);
102     }
103     // total count, total size
104     totalCount_ = SysEvent::totalCount_;
105     totalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
106     // min speed, max speed
107     uint32_t onceTotalRealTime = onceTotalRealTime_;
108     uint32_t onceTotalProcTime = onceTotalProcTime_;
109     uint32_t onceTotalWaitTime = onceTotalWaitTime_;
110     uint32_t onceTotalCnt = onceTotalCnt_;
111     onceTotalRealTime_ = 0;
112     onceTotalProcTime_ = 0;
113     onceTotalWaitTime_ = 0;
114     onceTotalCnt_ = 0;
115     if (onceTotalRealTime > 0) {
116         curRealSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalRealTime;
117         if (minSpeed_ == 0 || (minSpeed_ > curRealSpeed_)) {
118             minSpeed_ = curRealSpeed_;
119         }
120         if (curRealSpeed_ > maxSpeed_) {
121             maxSpeed_ = curRealSpeed_;
122         }
123     } else {
124         minSpeed_ = 0;
125         maxSpeed_ = 0;
126         curRealSpeed_ = 0;
127     }
128     if (onceTotalProcTime > 0) {
129         curProcSpeed_ = (TimeUtil::SEC_TO_MICROSEC * onceTotalCnt) / onceTotalProcTime;
130     } else {
131         curProcSpeed_ = 0;
132     }
133     if (onceTotalCnt > 0) {
134         avgRealTime_ = static_cast<double>(onceTotalRealTime) / onceTotalCnt;
135         avgProcessTime_ = static_cast<double>(onceTotalProcTime) / onceTotalCnt;
136         avgWaitTime_ = static_cast<double>(onceTotalWaitTime) / onceTotalCnt;
137     }
138     HIVIEW_LOGD("maxTotalCount_=%{public}u, maxTotalSize_=%{public}u, totalCount_=%{public}u, totalSize_=%{public}u, "
139         "onceTotalRealTime=%{public}u, onceTotalProcTime=%{public}u, onceTotalWaitTime=%{public}u, "
140         "onceTotalCnt=%{public}u, minSpeed_=%{public}u, maxSpeed_=%{public}u, "
141         "curRealSpeed_=%{public}u, curProcSpeed_=%{public}u, "
142         "avgRealTime_=%{public}f, avgProcessTime_=%{public}f, avgWaitTime_=%{public}f",
143         maxTotalCount_.load(), maxTotalSize_.load(), totalCount_, totalSize_,
144         onceTotalRealTime, onceTotalProcTime, onceTotalWaitTime,
145         onceTotalCnt, minSpeed_, maxSpeed_,
146         curRealSpeed_, curProcSpeed_,
147         avgRealTime_, avgProcessTime_, avgWaitTime_);
148 }
149 
GetDomainsStat(PerfMeasure & perfMeasure)150 void PlatformMonitor::GetDomainsStat(PerfMeasure &perfMeasure)
151 {
152     std::lock_guard<std::mutex> lock(topMutex_);
153     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
154         perfMeasure.domains.emplace_back(it->first);
155         perfMeasure.domainCounts.emplace_back(it->second);
156     }
157     topDomains_.clear();
158     topEvents_.clear();
159 }
160 
GetCostTimeInterval(PerfMeasure & perfMeasure)161 void PlatformMonitor::GetCostTimeInterval(PerfMeasure &perfMeasure)
162 {
163     std::lock_guard<std::mutex> lock(statMutex_);
164     for (int index = 0; index <= static_cast<int>(sizeof(intervals_) / sizeof(intervals_[0])); index++) {
165         uint32_t realCount = realStat_[index];
166         perfMeasure.realCounts.emplace_back(realCount);
167         uint32_t processCount = processStat_[index];
168         perfMeasure.processCounts.emplace_back(processCount);
169         uint32_t waitCount = waitTimeStat_[index];
170         perfMeasure.waitCounts.emplace_back(waitCount);
171     }
172     realStat_.clear();
173     processStat_.clear();
174     waitTimeStat_.clear();
175 }
176 
CalcOverBenckMarkPct(PerfMeasure & perfMeasure)177 void PlatformMonitor::CalcOverBenckMarkPct(PerfMeasure &perfMeasure)
178 {
179     perfMeasure.finishedCount = finishedCount_;
180     perfMeasure.overRealTotalCount = overRealTotalCount_;
181     perfMeasure.overProcessTotalCount = overProcessTotalCount_;
182     finishedCount_ = 0;
183     overRealTotalCount_ = 0;
184     overProcessTotalCount_ = 0;
185 
186     if (perfMeasure.finishedCount > 0) {
187         perfMeasure.realPercent = (PCT * perfMeasure.overRealTotalCount) / perfMeasure.finishedCount;
188     } else if (perfMeasure.overRealTotalCount > 0) {
189         perfMeasure.realPercent = PCT;
190     }
191 
192     if (perfMeasure.finishedCount > 0) {
193         perfMeasure.processpercent = (PCT * perfMeasure.overProcessTotalCount) / perfMeasure.finishedCount;
194     } else if (perfMeasure.overProcessTotalCount > 0) {
195         perfMeasure.processpercent = PCT;
196     }
197 }
198 
GetMaxTotalMeasure(PerfMeasure & perfMeasure)199 void PlatformMonitor::GetMaxTotalMeasure(PerfMeasure &perfMeasure)
200 {
201     perfMeasure.maxTotalCount = maxTotalCount_.load();
202     maxTotalCount_.store(0);
203 
204     perfMeasure.maxTotalSize = maxTotalSize_.load();
205     maxTotalSize_.store(0);
206 }
207 
GetBreakStat(PerfMeasure & perfMeasure)208 void PlatformMonitor::GetBreakStat(PerfMeasure &perfMeasure)
209 {
210     perfMeasure.totalCount = totalCount_;
211     totalCount_ = 0;
212 
213     perfMeasure.totalSize = totalSize_;
214     totalSize_ = 0;
215 
216     perfMeasure.breakCount = breakCount_;
217     breakCount_ = 0;
218 
219     perfMeasure.breakDuration = breakDuration_;
220     breakDuration_ = 0;
221 }
222 
GetMaxSpeed(PerfMeasure & perfMeasure) const223 void PlatformMonitor::GetMaxSpeed(PerfMeasure &perfMeasure) const
224 {
225     perfMeasure.minSpeed = minSpeed_;
226     perfMeasure.maxSpeed = maxSpeed_;
227 }
228 
ReportProfile(const PerfMeasure & perfMeasure)229 void PlatformMonitor::ReportProfile(const PerfMeasure& perfMeasure)
230 {
231     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "PROFILE_STAT", HiSysEvent::EventType::STATISTIC,
232         "MAX_TOTAL_COUNT", perfMeasure.maxTotalCount, "MAX_TOTAL_SIZE", perfMeasure.maxTotalSize,
233         "DOMAINS", perfMeasure.domains, "DOMAIN_DETAIL", perfMeasure.domainCounts,
234         "TOTAL_COUNT", perfMeasure.totalCount, "TOTAL_SIZE", perfMeasure.totalSize,
235         "BREAK_COUNT", perfMeasure.breakCount, "BREAK_DURATION", perfMeasure.breakDuration,
236         "MIN_SPEED", perfMeasure.minSpeed, "MAX_SPEED", perfMeasure.maxSpeed, "REAL_COUNT", perfMeasure.realCounts,
237         "PROCESS_COUNT", perfMeasure.processCounts, "WAIT_COUNT", perfMeasure.waitCounts,
238         "FINISHED_COUNT", perfMeasure.finishedCount, "OVER_REAL_COUNT", perfMeasure.overRealTotalCount,
239         "OVER_REAL_PCT", perfMeasure.realPercent, "OVER_PROC_COUNT", perfMeasure.overProcessTotalCount,
240         "OVER_PROC_PCT", perfMeasure.processpercent);
241     if (ret != SUCCESS) {
242         HIVIEW_LOGE("failed to write PROFILE_STAT event, ret is %{public}d", ret);
243     }
244 }
245 
ReportCycleProfile()246 void PlatformMonitor::ReportCycleProfile()
247 {
248     HIVIEW_LOGI("report performance profile");
249     PerfMeasure perfMeasure;
250     // report max event size and count
251     GetMaxTotalMeasure(perfMeasure);
252 
253     // report event number of each domain
254     GetDomainsStat(perfMeasure);
255 
256     // report total number of event, time of break, duration of break
257     GetBreakStat(perfMeasure);
258 
259     // report min speed, max speed
260     GetMaxSpeed(perfMeasure);
261 
262     // report real time, process time, wait time of cost time interval
263     GetCostTimeInterval(perfMeasure);
264 
265     // report percent and total number of over benchmark
266     CalcOverBenckMarkPct(perfMeasure);
267 
268     ReportProfile(perfMeasure);
269     HIVIEW_LOGI("report performance profile have done");
270 }
271 
GetTopDomains(std::vector<std::string> & domains,std::vector<uint32_t> & counts)272 void PlatformMonitor::GetTopDomains(std::vector<std::string> &domains, std::vector<uint32_t> &counts)
273 {
274     std::lock_guard<std::mutex> lock(topMutex_);
275     uint8_t topN = 3; // top n
276     if (topDomains_.size() <= topN) {
277         for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
278             domains.emplace_back(it->first);
279             counts.emplace_back(it->second);
280         }
281         return;
282     }
283 
284     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
285         counts.emplace_back(it->second);
286     }
287     std::sort(counts.begin(), counts.end(), std::greater<int>());
288     counts.resize(topN);
289     for (auto it = topDomains_.begin(); it != topDomains_.end(); it++) {
290         if (domains.size() >= topN) {
291             break;
292         }
293         if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
294             domains.emplace_back(it->first);
295         }
296     }
297     return;
298 }
299 
GetTopEvents(std::vector<std::string> & events,std::vector<uint32_t> & counts)300 void PlatformMonitor::GetTopEvents(std::vector<std::string> &events, std::vector<uint32_t> &counts)
301 {
302     std::lock_guard<std::mutex> lock(topMutex_);
303     uint8_t topN = 3; // top n
304     if (topEvents_.size() <= topN) {
305         for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
306             events.emplace_back(it->first);
307             counts.emplace_back(it->second);
308         }
309         return;
310     }
311 
312     for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
313         counts.emplace_back(it->second);
314     }
315     std::sort(counts.begin(), counts.end(), std::greater<int>());
316     counts.resize(topN);
317     for (auto it = topEvents_.begin(); it != topEvents_.end(); it++) {
318         if (events.size() >= topN) {
319             break;
320         }
321         if (std::find(counts.begin(), counts.end(), it->second) != counts.end()) {
322             events.emplace_back(it->first);
323         }
324     }
325     return;
326 }
327 
ReportBreakProfile()328 void PlatformMonitor::ReportBreakProfile()
329 {
330     // report current event size and count
331     uint32_t curTotalCount_ = SysEvent::totalCount_;
332     uint32_t curTotalSize_ = static_cast<uint32_t>(SysEvent::totalSize_);
333 
334     // report current speed
335     uint32_t curRealSpeed = curRealSpeed_;
336     uint32_t curProcessSpeed = curProcSpeed_;
337 
338     // report average real time, process time, wait time
339     double avgRealTime = avgRealTime_;
340     double avgProcessTime = avgProcessTime_;
341     double avgWaitTime = avgWaitTime_;
342 
343     // report topk cost time event
344     std::vector<std::string> events;
345     std::vector<uint32_t> eventCounts;
346     GetTopEvents(events, eventCounts);
347 
348     // report topk event and count
349     std::vector<std::string> domains;
350     std::vector<uint32_t> domainCounts;
351     GetTopDomains(domains, domainCounts);
352     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "BREAK", HiSysEvent::EventType::BEHAVIOR,
353         "TOTAL_COUNT", curTotalCount_, "TOTAL_SIZE", curTotalSize_, "REAL_SPEED", curRealSpeed,
354         "PROC_SPEED", curProcessSpeed, "AVG_REAL_TIME", avgRealTime, "AVG_PROC_TIME", avgProcessTime,
355         "AVG_WAIT_TIME", avgWaitTime, "TOP_EVENT", events, "TOP_EVENT_COUNT", eventCounts, "TOP_DOMAIN", domains,
356         "TOP_DOMAIN_COUNT", domainCounts);
357     if (ret != SUCCESS) {
358         HIVIEW_LOGE("failed to write BREAK event, ret is %{public}d", ret);
359     }
360 }
361 
ReportRecoverProfile()362 void PlatformMonitor::ReportRecoverProfile()
363 {
364     // report break duration when recovery
365     int64_t duration = static_cast<int64_t>(recoverTimestamp_ - breakTimestamp_);
366     int ret = HiSysEventWrite(HiSysEvent::Domain::HIVIEWDFX, "RECOVER", HiSysEvent::EventType::BEHAVIOR,
367         "DURATION", duration);
368     if (ret != SUCCESS) {
369         HIVIEW_LOGE("failed to write RECOVER event, ret is %{public}d", ret);
370     }
371 }
372 
Breaking()373 void PlatformMonitor::Breaking()
374 {
375     // collect break count and duration every break
376     if (SysEvent::totalSize_ <= totalSizeBenchMark_) {
377         return;
378     }
379 
380     HIVIEW_LOGE("break as event reach critical size %{public}" PRId64, SysEvent::totalSize_.load());
381     breakTimestamp_ = TimeUtil::GenerateTimestamp();
382     ReportBreakProfile();
383     int64_t recoveryBenchMark = static_cast<int64_t>(totalSizeBenchMark_ * 0.8); // 0.8 of total size will recover
384     while (true) {
385         if (SysEvent::totalSize_ <= recoveryBenchMark) {
386             break;
387         }
388         TimeUtil::Sleep(SLEEP_TEN_SECONDS);
389     }
390     breakCount_++;
391     recoverTimestamp_ = TimeUtil::GenerateTimestamp();
392     breakDuration_ += recoverTimestamp_ - breakTimestamp_;
393     HIVIEW_LOGW("recover after break duration %{public}" PRIu64, breakDuration_);
394     ReportRecoverProfile();
395 }
396 
InitData()397 void PlatformMonitor::InitData()
398 {
399     MonitorConfig monitorConfig("/system/etc/hiview/monitor.cfg");
400     if (!monitorConfig.Parse()) {
401         return;
402     }
403 
404     monitorConfig.ReadParam("collectPeriod", collectPeriod_);
405     monitorConfig.ReadParam("reportPeriod", reportPeriod_);
406     monitorConfig.ReadParam("totalSizeBenchMark", totalSizeBenchMark_);
407     monitorConfig.ReadParam("realTimeBenchMark", realTimeBenchMark_);
408     monitorConfig.ReadParam("processTimeBenchMark", processTimeBenchMark_);
409 }
410 
StartMonitor(std::shared_ptr<EventLoop> looper)411 void PlatformMonitor::StartMonitor(std::shared_ptr<EventLoop> looper)
412 {
413     if (looper == nullptr) {
414         HIVIEW_LOGE("can not get share looper");
415         return;
416     }
417     InitData();
418 
419     looper_ = looper;
420 
421     auto collectTask = std::bind(&PlatformMonitor::CollectPerfProfiler, this);
422     looper_->AddTimerEvent(nullptr, nullptr, collectTask, collectPeriod_, true);
423     auto reportTask = std::bind(&PlatformMonitor::ReportCycleProfile, this);
424     looper_->AddTimerEvent(nullptr, nullptr, reportTask, reportPeriod_, true);
425 }
426 } // namespace HiviewDFX
427 } // namespace OHOS
428