1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "worker_monitor.h"
17 #include <cstring>
18 #include <dlfcn.h>
19 #ifdef FFRT_OH_TRACE_ENABLE
20 #include "backtrace_local.h"
21 #endif
22
23 #include "eu/execute_unit.h"
24 #include "eu/worker_manager.h"
25 #include "internal_inc/osal.h"
26
27 namespace {
28 constexpr uint64_t PROCESS_NAME_BUFFER_LENGTH = 1024;
29 constexpr uint64_t MONITOR_TIMEOUT_MAX_COUNT = 2;
30 constexpr uint64_t MONITOR_SAMPLING_CYCLE_US = 500 * 1000;
31 constexpr uint64_t TIMEOUT_RECORD_CYCLE_US = 60 * 1000 * 1000;
32
33 using UvFunc = void(*)(void*);
34 constexpr unsigned int UV_FUNC_OFFSET = 2 *sizeof(uintptr_t);
35 }
36
37 namespace ffrt {
WorkerMonitor()38 WorkerMonitor::WorkerMonitor()
39 {
40 char processName[PROCESS_NAME_BUFFER_LENGTH];
41 GetProcessName(processName, PROCESS_NAME_BUFFER_LENGTH);
42 // hdc在调用hdc shell的时候会长期占用worker,过滤该进程以防止一直打印超时信息
43 // 另外,对hdc进程进行监控会概率性导致hdc断连,原因未知,暂时规避
44 skipSampling_ = (strstr(processName, "hdcd") != nullptr);
45 }
46
~WorkerMonitor()47 WorkerMonitor::~WorkerMonitor()
48 {
49 skipSampling_ = true;
50 }
51
SubmitSamplingTask()52 void WorkerMonitor::SubmitSamplingTask()
53 {
54 if (skipSampling_) {
55 return;
56 }
57
58 waitEntry_.tp = std::chrono::steady_clock::now() + std::chrono::microseconds(MONITOR_SAMPLING_CYCLE_US);
59 waitEntry_.cb = ([this](WaitEntry* we) { CheckWorkerStatus(); });
60 if (!DelayedWakeup(waitEntry_.tp, &waitEntry_, waitEntry_.cb)) {
61 FFRT_LOGW("Set delayed worker failed.");
62 }
63 }
64
CheckWorkerStatus()65 void WorkerMonitor::CheckWorkerStatus()
66 {
67 WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
68 QoS _qos = QoS(static_cast<int>(qos_max));
69 for (int i = 0; i < _qos() + 1; i++) {
70 std::shared_lock<std::shared_mutex> lck(workerGroup[i].tgMutex);
71 for (auto& thread : workerGroup[i].threads) {
72 WorkerThread* worker = thread.first;
73 CPUEUTask* workerTask = worker->curTask;
74 if (workerTask == nullptr) {
75 workerStatus_.erase(worker);
76 continue;
77 }
78
79 // only support uv task
80 if (!(workerTask->type != ffrt_normal_task && workerTask->type != ffrt_io_task)) {
81 continue;
82 }
83
84 RecordTimeoutFunctionInfo(worker, workerTask);
85 }
86 }
87
88 SubmitSamplingTask();
89 }
90
RecordTimeoutFunctionInfo(WorkerThread * worker,CPUEUTask * workerTask)91 void WorkerMonitor::RecordTimeoutFunctionInfo(WorkerThread* worker, CPUEUTask* workerTask)
92 {
93 auto workerIter = workerStatus_.find(worker);
94 if (workerIter == workerStatus_.end()) {
95 workerStatus_[worker] = { workerTask, 0 };
96 return;
97 }
98
99 if (workerIter->second.first == workerTask) {
100 if (++workerIter->second.second >= MONITOR_TIMEOUT_MAX_COUNT) {
101 RecordSymbolAndBacktrace(workerTask, worker->Id());
102 workerIter->second.second =
103 -static_cast<int>(TIMEOUT_RECORD_CYCLE_US / MONITOR_SAMPLING_CYCLE_US - MONITOR_TIMEOUT_MAX_COUNT);
104 }
105 return;
106 }
107
108 workerIter->second = { workerTask, 0 };
109 }
110
RecordSymbolAndBacktrace(CPUEUTask * task,int tid)111 void WorkerMonitor::RecordSymbolAndBacktrace(CPUEUTask* task, int tid)
112 {
113 void* func = nullptr;
114 if (task->type != 0) {
115 UvFunc* funcAddr = reinterpret_cast<UvFunc*>(reinterpret_cast<char*>(task) - UV_FUNC_OFFSET);
116 func = reinterpret_cast<void*>(*funcAddr);
117 }
118
119 Dl_info info;
120 if (dladdr(func, &info)) {
121 FFRT_LOGW("Function [%s] in [%s] occupies worker for more than 1s.",
122 (info.dli_sname ? info.dli_sname : "unknown"), (info.dli_fname ? info.dli_fname : "unknown"));
123
124 #ifdef FFRT_OF_TRACE_ENABLE
125 std::string dumpInfo;
126 if (OHOS::HiviewDFX::GetBacktraceStringByTid(dumpInfo, tid, 0, false)) {
127 FFRT_LOGW("Backtrace:\n%s", dumpInfo.c_str());
128 }
129 #endif
130 }
131 }
132 }