1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #ifdef FFRT_BBOX_ENABLE
16
17 #include "bbox.h"
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <csignal>
21 #include <cstdlib>
22 #include <string>
23 #include <sstream>
24 #ifdef FFRT_CO_BACKTRACE_ENABLE
25 #include <utils/CallStack.h>
26 #include "core/task_ctx.h"
27 #endif
28 #include "dfx/log/ffrt_log_api.h"
29 #include "sched/scheduler.h"
30
31 static std::atomic<unsigned int> g_taskSubmitCounter(0);
32 static std::atomic<unsigned int> g_taskDoneCounter(0);
33 static std::atomic<unsigned int> g_taskEnQueueCounter(0);
34 static std::atomic<unsigned int> g_taskRunCounter(0);
35 static std::atomic<unsigned int> g_taskSwitchCounter(0);
36 static std::atomic<unsigned int> g_taskFinishCounter(0);
37
38 static struct sigaction s_oldSa[SIGSYS + 1]; // SIGSYS = 31
39
40 using namespace ffrt;
41
TaskSubmitCounterInc(void)42 void TaskSubmitCounterInc(void)
43 {
44 ++g_taskSubmitCounter;
45 }
46
TaskDoneCounterInc(void)47 void TaskDoneCounterInc(void)
48 {
49 ++g_taskDoneCounter;
50 }
51
TaskEnQueuCounterInc(void)52 void TaskEnQueuCounterInc(void)
53 {
54 ++g_taskEnQueueCounter;
55 }
56
TaskRunCounterInc(void)57 void TaskRunCounterInc(void)
58 {
59 ++g_taskRunCounter;
60 }
61
TaskSwitchCounterInc(void)62 void TaskSwitchCounterInc(void)
63 {
64 ++g_taskSwitchCounter;
65 }
66
TaskFinishCounterInc(void)67 void TaskFinishCounterInc(void)
68 {
69 ++g_taskFinishCounter;
70 }
71
SaveCurrent()72 static inline void SaveCurrent()
73 {
74 FFRT_BBOX_LOG("<<<=== current status ===>>>");
75 auto t = ExecuteCtx::Cur()->task;
76 if (t) {
77 FFRT_BBOX_LOG("current: thread id %u, task id %lu, qos %d, name %s", gettid(),
78 t->gid, t->qos(), t->label.c_str());
79 }
80
81 const int IGNORE_DEPTH = 3;
82 backtrace(IGNORE_DEPTH);
83 }
84
SaveTaskCounter()85 static inline void SaveTaskCounter()
86 {
87 FFRT_BBOX_LOG("<<<=== task counter ===>>>");
88 FFRT_BBOX_LOG("FFRT BBOX TaskSubmitCounter:%u TaskEnQueueCounter:%u TaskDoneCounter:%u",
89 g_taskSubmitCounter.load(), g_taskEnQueueCounter.load(), g_taskDoneCounter.load());
90 FFRT_BBOX_LOG("FFRT BBOX TaskRunCounter:%u TaskSwitchCounter:%u TaskFinishCounter:%u", g_taskRunCounter.load(),
91 g_taskSwitchCounter.load(), g_taskFinishCounter.load());
92 if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
93 FFRT_BBOX_LOG("TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter");
94 } else {
95 FFRT_BBOX_LOG("TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter");
96 }
97 }
98
SaveWorkerStatus()99 static inline void SaveWorkerStatus()
100 {
101 WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
102 FFRT_BBOX_LOG("<<<=== worker status ===>>>");
103 for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
104 std::unique_lock lock(workerGroup[i].tgMutex);
105 for (auto& thread : workerGroup[i].threads) {
106 TaskCtx* t = thread.first->curTask;
107 if (t == nullptr) {
108 FFRT_BBOX_LOG("qos %d: worker tid %d is running nothing", i, thread.first->Id());
109 continue;
110 }
111 FFRT_BBOX_LOG("qos %d: worker tid %d is running task id %lu name %s", i, thread.first->Id(),
112 t->gid, t->label.c_str());
113 }
114 }
115 }
116
SaveReadyQueueStatus()117 static inline void SaveReadyQueueStatus()
118 {
119 FFRT_BBOX_LOG("<<<=== ready queue status ===>>>");
120 for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
121 int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
122 if (!nt) {
123 continue;
124 }
125
126 for (int j = 0; j < nt; j++) {
127 TaskCtx* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
128 if (t == nullptr) {
129 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> null", i + 1, j, nt);
130 continue;
131 }
132 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> id %lu name %s",
133 i + 1, j, nt, t->gid, t->label.c_str());
134 }
135 }
136 }
137
SaveTaskStatus()138 static inline void SaveTaskStatus()
139 {
140 auto unfree = SimpleAllocator<TaskCtx>::getUnfreedMem();
141 auto apply = [&](const char* tag, const std::function<bool(TaskCtx*)>& filter) {
142 decltype(unfree) tmp;
143 for (auto t : unfree) {
144 if (filter(t)) {
145 tmp.emplace_back(t);
146 }
147 }
148
149 if (tmp.size() > 0) {
150 FFRT_BBOX_LOG("<<<=== %s ===>>>", tag);
151 }
152 size_t idx = 1;
153 for (auto t : tmp) {
154 FFRT_BBOX_LOG("<%zu/%lu> id %lu qos %d name %s", idx++,
155 tmp.size(), t->gid, t->qos(), t->label.c_str());
156 if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
157 CoStart(t);
158 }
159 }
160 };
161
162 apply("blocked by synchronization primitive(mutex etc)", [](TaskCtx* t) {
163 return (t->state == TaskState::RUNNING) && t->coRoutine &&
164 t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
165 });
166 apply("blocked by task dependence", [](TaskCtx* t) {
167 return t->state == TaskState::BLOCKED;
168 });
169 apply("pending task", [](TaskCtx* t) {
170 return t->state == TaskState::PENDING;
171 });
172 }
173
174 static std::atomic_uint g_bbox_tid_is_dealing {0};
175 static std::atomic_uint g_bbox_called_times {0};
176 static std::condition_variable g_bbox_cv;
177 static std::mutex g_bbox_mtx;
178
BboxFreeze()179 void BboxFreeze()
180 {
181 std::unique_lock<std::mutex> lk(g_bbox_mtx);
182 g_bbox_cv.wait(lk, [] { return g_bbox_tid_is_dealing.load() == 0; });
183 }
184
backtrace(int ignoreDepth)185 void backtrace(int ignoreDepth)
186 {
187 FFRT_BBOX_LOG("backtrace");
188
189 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
190 std::string dumpInfo;
191 TaskCtx::DumpTask(nullptr, dumpInfo);
192 #endif
193 }
194
GetBboxEnableState(void)195 unsigned int GetBboxEnableState(void)
196 {
197 return g_bbox_tid_is_dealing.load();
198 }
199
FFRTIsWork()200 bool FFRTIsWork()
201 {
202 if (g_taskSubmitCounter.load() == 0) {
203 return false;
204 } else if (g_taskSubmitCounter.load() == g_taskDoneCounter.load()) {
205 FFRT_BBOX_LOG("ffrt already finished, TaskSubmitCounter:%u, TaskDoneCounter:%u",
206 g_taskSubmitCounter.load(), g_taskDoneCounter.load());
207 return false;
208 }
209
210 return true;
211 }
212
SaveTheBbox()213 void SaveTheBbox()
214 {
215 unsigned int expect = 0;
216 unsigned int tid = static_cast<unsigned int>(gettid());
217 if (!g_bbox_tid_is_dealing.compare_exchange_strong(expect, tid)) {
218 if (tid == g_bbox_tid_is_dealing.load()) {
219 FFRT_BBOX_LOG("thread %u black box save failed", tid);
220 g_bbox_tid_is_dealing.store(0);
221 g_bbox_cv.notify_all();
222 } else {
223 FFRT_BBOX_LOG("thread %u trigger signal again, when thread %u is saving black box",
224 tid, g_bbox_tid_is_dealing.load());
225 BboxFreeze(); // hold other thread's signal resend
226 }
227 return;
228 }
229
230 if (g_bbox_called_times.fetch_add(1) == 0) { // only save once
231 FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) start ===>>>");
232 SaveCurrent();
233 SaveTaskCounter();
234 SaveWorkerStatus();
235 SaveReadyQueueStatus();
236 SaveTaskStatus();
237 FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) finish ===>>>");
238 }
239
240 std::lock_guard lk(g_bbox_mtx);
241 g_bbox_tid_is_dealing.store(0);
242 g_bbox_cv.notify_all();
243 }
244
ResendSignal(siginfo_t * info)245 static void ResendSignal(siginfo_t* info)
246 {
247 int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), syscall(SYS_gettid), info->si_signo, info);
248 if (rc != 0) {
249 FFRT_BBOX_LOG("ffrt failed to resend signal during crash");
250 }
251 }
252
GetSigName(const siginfo_t * info)253 static const char* GetSigName(const siginfo_t* info)
254 {
255 switch (info->si_signo) {
256 case SIGABRT: return "SIGABRT";
257 case SIGBUS: return "SIGBUS";
258 case SIGFPE: return "SIGFPE";
259 case SIGILL: return "SIGILL";
260 case SIGSEGV: return "SIGSEGV";
261 case SIGSTKFLT: return "SIGSTKFLT";
262 case SIGSTOP: return "SIGSTOP";
263 case SIGSYS: return "SIGSYS";
264 case SIGTRAP: return "SIGTRAP";
265 default: return "?";
266 }
267 }
268
SignalHandler(int signo,siginfo_t * info,void * context)269 static void SignalHandler(int signo, siginfo_t* info, void* context __attribute__((unused)))
270 {
271 if (FFRTIsWork()) {
272 FFRT_BBOX_LOG("recv signal %d (%s) code %d", signo, GetSigName(info), info->si_code);
273 SaveTheBbox();
274 }
275
276 // we need to deregister our signal handler for that signal before continuing.
277 sigaction(signo, &s_oldSa[signo], nullptr);
278 ResendSignal(info);
279 }
280
SignalReg(int signo)281 static void SignalReg(int signo)
282 {
283 sigaction(signo, nullptr, &s_oldSa[signo]);
284 struct sigaction newAction;
285 newAction.sa_flags = SA_RESTART | SA_SIGINFO;
286 newAction.sa_sigaction = SignalHandler;
287 sigaction(signo, &newAction, nullptr);
288 }
289
BBoxInit()290 __attribute__((constructor)) static void BBoxInit()
291 {
292 SignalReg(SIGABRT);
293 SignalReg(SIGBUS);
294 SignalReg(SIGFPE);
295 SignalReg(SIGILL);
296 SignalReg(SIGSEGV);
297 SignalReg(SIGSTKFLT);
298 SignalReg(SIGSYS);
299 SignalReg(SIGTRAP);
300 SignalReg(SIGINT);
301 SignalReg(SIGKILL);
302 }
303
304 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
SaveTaskCounterInfo(void)305 std::string SaveTaskCounterInfo(void)
306 {
307 std::ostringstream ss;
308 ss << "<<<=== task counter ===>>>" << std::endl;
309 ss << "FFRT BBOX TaskSubmitCounter:" << g_taskSubmitCounter.load() << " TaskEnQueueCounter:"
310 << g_taskEnQueueCounter.load() << " TaskDoneCounter:" << g_taskDoneCounter.load() << std::endl;
311 ss << "FFRT BBOX TaskRunCounter:" << g_taskRunCounter.load() << " TaskSwitchCounter:"
312 << g_taskSwitchCounter.load() << " TaskFinishCounter:" << g_taskFinishCounter.load() << std::endl;
313
314 if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
315 ss << "TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter" << std::endl;
316 } else {
317 ss << "TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter" << std::endl;
318 }
319 return ss.str();
320 }
321
SaveWorkerStatusInfo(void)322 std::string SaveWorkerStatusInfo(void)
323 {
324 std::ostringstream ss;
325 WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
326 ss << "<<<=== worker status ===>>>" << std::endl;
327 for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
328 std::unique_lock lock(workerGroup[i].tgMutex);
329 for (auto& thread : workerGroup[i].threads) {
330 TaskCtx* t = thread.first->curTask;
331 if (t == nullptr) {
332 ss << "qos " << i << ": worker tid " << thread.first->Id()
333 << " is running nothing" << std::endl;
334 continue;
335 }
336 ss << "qos " << i << ": worker tid " << thread.first->Id()
337 << " is running task id " << t->gid << " name " << t->label.c_str() << std::endl;
338 }
339 }
340 return ss.str();
341 }
342
SaveReadyQueueStatusInfo()343 std::string SaveReadyQueueStatusInfo()
344 {
345 std::ostringstream ss;
346 ss << "<<<=== ready queue status ===>>>" << std::endl;
347 for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
348 int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
349 if (!nt) {
350 continue;
351 }
352
353 for (int j = 0; j < nt; j++) {
354 TaskCtx* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
355 if (t == nullptr) {
356 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << ">"
357 << " null" << std::endl;
358 continue;
359 }
360 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << "> id "
361 << t->gid << " name " << t->label.c_str() << std::endl;
362 }
363 }
364 return ss.str();
365 }
366
SaveTaskStatusInfo(void)367 std::string SaveTaskStatusInfo(void)
368 {
369 std::string ffrtStackInfo;
370 std::ostringstream ss;
371 auto unfree = SimpleAllocator<TaskCtx>::getUnfreedMem();
372 auto apply = [&](const char* tag, const std::function<bool(TaskCtx*)>& filter) {
373 decltype(unfree) tmp;
374 for (auto t : unfree) {
375 if (filter(t)) {
376 tmp.emplace_back(t);
377 }
378 }
379
380 if (tmp.size() > 0) {
381 ss << "<<<=== " << tag << "===>>>" << std::endl;
382 ffrtStackInfo += ss.str();
383 }
384 size_t idx = 1;
385 for (auto t : tmp) {
386 ss.str("");
387 ss << "<" << idx++ << "/" << tmp.size() << ">" << "id" << t->gid << "qos"
388 << t->qos() << "name" << t->label.c_str() << std::endl;
389 ffrtStackInfo += ss.str();
390 if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
391 std::string dumpInfo;
392 TaskCtx::DumpTask(t, dumpInfo, 1);
393 ffrtStackInfo += dumpInfo;
394 }
395 }
396 };
397
398 apply("blocked by synchronization primitive(mutex etc)", [](TaskCtx* t) {
399 return (t->state == TaskState::RUNNING) && t->coRoutine &&
400 t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
401 });
402 apply("blocked by task dependence", [](TaskCtx* t) {
403 return t->state == TaskState::BLOCKED;
404 });
405 apply("pending task", [](TaskCtx* t) {
406 return t->state == TaskState::PENDING;
407 });
408
409 return ffrtStackInfo;
410 }
411 #endif
412 #endif /* FFRT_BBOX_ENABLE */