1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #ifdef FFRT_BBOX_ENABLE
16
17 #include "bbox.h"
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <csignal>
21 #include <cstdlib>
22 #include <string>
23 #include <sstream>
24 #include "dfx/log/ffrt_log_api.h"
25 #include "sched/scheduler.h"
26
27 using namespace ffrt;
28
29 static std::atomic<unsigned int> g_taskSubmitCounter(0);
30 static std::atomic<unsigned int> g_taskDoneCounter(0);
31 static std::atomic<unsigned int> g_taskEnQueueCounter(0);
32 static std::atomic<unsigned int> g_taskRunCounter(0);
33 static std::atomic<unsigned int> g_taskSwitchCounter(0);
34 static std::atomic<unsigned int> g_taskFinishCounter(0);
35 #ifdef FFRT_IO_TASK_SCHEDULER
36 static std::atomic<unsigned int> g_taskPendingCounter(0);
37 static std::atomic<unsigned int> g_taskWakeCounter(0);
38 #endif
39 static CPUEUTask* g_cur_task;
40 std::mutex bbox_handle_lock;
41 std::condition_variable bbox_handle_end;
42
43 static struct sigaction s_oldSa[SIGSYS + 1]; // SIGSYS = 31
44
TaskSubmitCounterInc(void)45 void TaskSubmitCounterInc(void)
46 {
47 ++g_taskSubmitCounter;
48 }
49
50 #ifdef FFRT_IO_TASK_SCHEDULER
TaskWakeCounterInc(void)51 void TaskWakeCounterInc(void)
52 {
53 ++g_taskWakeCounter;
54 }
55 #endif
56
TaskDoneCounterInc(void)57 void TaskDoneCounterInc(void)
58 {
59 ++g_taskDoneCounter;
60 }
61
TaskEnQueuCounterInc(void)62 void TaskEnQueuCounterInc(void)
63 {
64 ++g_taskEnQueueCounter;
65 }
66
TaskRunCounterInc(void)67 void TaskRunCounterInc(void)
68 {
69 ++g_taskRunCounter;
70 }
71
TaskSwitchCounterInc(void)72 void TaskSwitchCounterInc(void)
73 {
74 ++g_taskSwitchCounter;
75 }
76
TaskFinishCounterInc(void)77 void TaskFinishCounterInc(void)
78 {
79 ++g_taskFinishCounter;
80 }
81
82 #ifdef FFRT_IO_TASK_SCHEDULER
TaskPendingCounterInc(void)83 void TaskPendingCounterInc(void)
84 {
85 ++g_taskPendingCounter;
86 }
87 #endif
88
SaveCurrent()89 static inline void SaveCurrent()
90 {
91 FFRT_BBOX_LOG("<<<=== current status ===>>>");
92 auto t = g_cur_task;
93 if (t) {
94 if (t->type == 0) {
95 FFRT_BBOX_LOG("current: thread id %u, task id %lu, qos %d, name %s", gettid(),
96 t->gid, t->qos(), t->label.c_str());
97 }
98 }
99 }
100
SaveTaskCounter()101 static inline void SaveTaskCounter()
102 {
103 FFRT_BBOX_LOG("<<<=== task counter ===>>>");
104 FFRT_BBOX_LOG("FFRT BBOX TaskSubmitCounter:%u TaskEnQueueCounter:%u TaskDoneCounter:%u",
105 g_taskSubmitCounter.load(), g_taskEnQueueCounter.load(), g_taskDoneCounter.load());
106 FFRT_BBOX_LOG("FFRT BBOX TaskRunCounter:%u TaskSwitchCounter:%u TaskFinishCounter:%u", g_taskRunCounter.load(),
107 g_taskSwitchCounter.load(), g_taskFinishCounter.load());
108 #ifdef FFRT_IO_TASK_SCHEDULER
109 FFRT_BBOX_LOG("FFRT BBOX TaskWakeCounterInc:%u, TaskPendingCounter:%u",
110 g_taskWakeCounter.load(), g_taskPendingCounter.load());
111 #endif
112 if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
113 FFRT_BBOX_LOG("TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter");
114 } else {
115 FFRT_BBOX_LOG("TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter");
116 }
117 }
118
SaveWorkerStatus()119 static inline void SaveWorkerStatus()
120 {
121 WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
122 FFRT_BBOX_LOG("<<<=== worker status ===>>>");
123 ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
124 for (int i = 0; i < _qos() + 1; i++) {
125 std::shared_lock<std::shared_mutex> lck(workerGroup[i].tgMutex);
126 for (auto& thread : workerGroup[i].threads) {
127 CPUEUTask* t = thread.first->curTask;
128 if (t == nullptr) {
129 FFRT_BBOX_LOG("qos %d: worker tid %d is running nothing", i, thread.first->Id());
130 continue;
131 }
132 if (t->type == 0) {
133 FFRT_BBOX_LOG("qos %d: worker tid %d is running task id %lu name %s", i, thread.first->Id(),
134 t->gid, t->label.c_str());
135 }
136 }
137 }
138 }
139
SaveReadyQueueStatus()140 static inline void SaveReadyQueueStatus()
141 {
142 FFRT_BBOX_LOG("<<<=== ready queue status ===>>>");
143 ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
144 for (int i = 0; i < _qos() + 1; i++) {
145 int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
146 if (!nt) {
147 continue;
148 }
149
150 for (int j = 0; j < nt; j++) {
151 CPUEUTask* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
152 if (t == nullptr) {
153 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> null", i + 1, j, nt);
154 continue;
155 }
156 if (t->type == 0) {
157 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> id %lu name %s",
158 i + 1, j, nt, t->gid, t->label.c_str());
159 }
160 }
161 }
162 }
163
SaveTaskStatus()164 static inline void SaveTaskStatus()
165 {
166 auto unfree = SimpleAllocator<CPUEUTask>::getUnfreedMem();
167 auto apply = [&](const char* tag, const std::function<bool(CPUEUTask*)>& filter) {
168 decltype(unfree) tmp;
169 for (auto t : unfree) {
170 if (filter(t)) {
171 tmp.emplace_back(t);
172 }
173 }
174
175 if (tmp.size() > 0) {
176 FFRT_BBOX_LOG("<<<=== %s ===>>>", tag);
177 }
178 size_t idx = 1;
179 for (auto t : tmp) {
180 if (t->type == 0) {
181 FFRT_BBOX_LOG("<%zu/%lu> id %lu qos %d name %s", idx++,
182 tmp.size(), t->gid, t->qos(), t->label.c_str());
183 }
184 if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))
185 && t != g_cur_task) {
186 CoStart(t);
187 }
188 }
189 };
190
191 apply("blocked by synchronization primitive(mutex etc)", [](CPUEUTask* t) {
192 return (t->state == TaskState::RUNNING) && t->coRoutine &&
193 t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH) && t != g_cur_task;
194 });
195 apply("blocked by task dependence", [](CPUEUTask* t) {
196 return t->state == TaskState::BLOCKED;
197 });
198 apply("pending task", [](CPUEUTask* t) {
199 return t->state == TaskState::PENDING;
200 });
201 }
202
203 static std::atomic_uint g_bbox_tid_is_dealing {0};
204 static std::atomic_uint g_bbox_called_times {0};
205 static std::condition_variable g_bbox_cv;
206 static std::mutex g_bbox_mtx;
207
BboxFreeze()208 void BboxFreeze()
209 {
210 std::unique_lock<std::mutex> lk(g_bbox_mtx);
211 g_bbox_cv.wait(lk, [] { return g_bbox_tid_is_dealing.load() == 0; });
212 }
213
backtrace(int ignoreDepth)214 void backtrace(int ignoreDepth)
215 {
216 FFRT_BBOX_LOG("backtrace");
217 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
218 std::string dumpInfo;
219 CPUEUTask::DumpTask(nullptr, dumpInfo);
220 #endif
221 }
222
GetBboxEnableState(void)223 unsigned int GetBboxEnableState(void)
224 {
225 return g_bbox_tid_is_dealing.load();
226 }
227
FFRTIsWork()228 bool FFRTIsWork()
229 {
230 if (g_taskSubmitCounter.load() == 0) {
231 return false;
232 } else if (g_taskSubmitCounter.load() == g_taskDoneCounter.load()) {
233 return false;
234 }
235
236 return true;
237 }
238
SaveTheBbox()239 void SaveTheBbox()
240 {
241 if (g_bbox_called_times.fetch_add(1) == 0) { // only save once
242 std::thread([&]() {
243 unsigned int expect = 0;
244 unsigned int tid = static_cast<unsigned int>(gettid());
245 (void)g_bbox_tid_is_dealing.compare_exchange_strong(expect, tid);
246
247 FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) start ===>>>");
248 SaveCurrent();
249 SaveTaskCounter();
250 SaveWorkerStatus();
251 SaveReadyQueueStatus();
252 SaveTaskStatus();
253 FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) finish ===>>>");
254
255 std::unique_lock handle_end_lk(bbox_handle_lock);
256 bbox_handle_end.notify_one();
257
258 std::lock_guard lk(g_bbox_mtx);
259 g_bbox_tid_is_dealing.store(0);
260 g_bbox_cv.notify_all();
261 }).detach();
262
263 {
264 std::unique_lock lk(bbox_handle_lock);
265 (void)bbox_handle_end.wait_for(lk, std::chrono::seconds(5));
266 }
267 } else {
268 unsigned int tid = static_cast<unsigned int>(gettid());
269 if (tid == g_bbox_tid_is_dealing.load()) {
270 FFRT_BBOX_LOG("thread %u black box save failed", tid);
271 g_bbox_tid_is_dealing.store(0);
272 g_bbox_cv.notify_all();
273 } else {
274 FFRT_BBOX_LOG("thread %u trigger signal again, when thread %u is saving black box",
275 tid, g_bbox_tid_is_dealing.load());
276 BboxFreeze(); // hold other thread's signal resend
277 }
278 }
279 }
280
ResendSignal(siginfo_t * info)281 static void ResendSignal(siginfo_t* info)
282 {
283 int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), syscall(SYS_gettid), info->si_signo, info);
284 if (rc != 0) {
285 FFRT_BBOX_LOG("ffrt failed to resend signal during crash");
286 }
287 }
288
GetSigName(const siginfo_t * info)289 static const char* GetSigName(const siginfo_t* info)
290 {
291 switch (info->si_signo) {
292 case SIGABRT: return "SIGABRT";
293 case SIGBUS: return "SIGBUS";
294 case SIGFPE: return "SIGFPE";
295 case SIGILL: return "SIGILL";
296 case SIGSEGV: return "SIGSEGV";
297 case SIGSTKFLT: return "SIGSTKFLT";
298 case SIGSTOP: return "SIGSTOP";
299 case SIGSYS: return "SIGSYS";
300 case SIGTRAP: return "SIGTRAP";
301 default: return "?";
302 }
303 }
304
SignalHandler(int signo,siginfo_t * info,void * context)305 static void SignalHandler(int signo, siginfo_t* info, void* context __attribute__((unused)))
306 {
307 g_cur_task = ExecuteCtx::Cur()->task;
308 if (FFRTIsWork()) {
309 SaveTheBbox();
310 }
311 // we need to deregister our signal handler for that signal before continuing.
312 sigaction(signo, &s_oldSa[signo], nullptr);
313 ResendSignal(info);
314 }
315
SignalReg(int signo)316 static void SignalReg(int signo)
317 {
318 sigaction(signo, nullptr, &s_oldSa[signo]);
319 struct sigaction newAction;
320 newAction.sa_flags = SA_RESTART | SA_SIGINFO;
321 newAction.sa_sigaction = SignalHandler;
322 sigaction(signo, &newAction, nullptr);
323 }
324
BBoxInit()325 __attribute__((constructor)) static void BBoxInit()
326 {
327 SignalReg(SIGABRT);
328 SignalReg(SIGBUS);
329 SignalReg(SIGFPE);
330 SignalReg(SIGILL);
331 SignalReg(SIGSEGV);
332 SignalReg(SIGSTKFLT);
333 SignalReg(SIGSYS);
334 SignalReg(SIGTRAP);
335 SignalReg(SIGINT);
336 SignalReg(SIGKILL);
337 }
338
339 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
SaveTaskCounterInfo(void)340 std::string SaveTaskCounterInfo(void)
341 {
342 std::ostringstream ss;
343 ss << "<<<=== task counter ===>>>" << std::endl;
344 ss << "FFRT BBOX TaskSubmitCounter:" << g_taskSubmitCounter.load() << " TaskEnQueueCounter:"
345 << g_taskEnQueueCounter.load() << " TaskDoneCounter:" << g_taskDoneCounter.load() << std::endl;
346
347 ss << "FFRT BBOX TaskRunCounter:" << g_taskRunCounter.load() << " TaskSwitchCounter:"
348 << g_taskSwitchCounter.load() << " TaskFinishCounter:" << g_taskFinishCounter.load() << std::endl;
349
350 if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
351 ss << "TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter" << std::endl;
352 } else {
353 ss << "TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter" << std::endl;
354 }
355 return ss.str();
356 }
357
SaveWorkerStatusInfo(void)358 std::string SaveWorkerStatusInfo(void)
359 {
360 std::ostringstream ss;
361 WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
362 ss << "<<<=== worker status ===>>>" << std::endl;
363 ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
364 for (int i = 0; i < _qos() + 1; i++) {
365 std::shared_lock<std::shared_mutex> lck(workerGroup[i].tgMutex);
366 for (auto& thread : workerGroup[i].threads) {
367 CPUEUTask* t = thread.first->curTask;
368 if (t == nullptr) {
369 ss << "qos " << i << ": worker tid " << thread.first->Id()
370 << " is running nothing" << std::endl;
371 continue;
372 }
373 if (t->type == 0) {
374 ss << "qos " << i << ": worker tid " << thread.first->Id()
375 << " is running task id " << t->gid << " name " << t->label.c_str() << std::endl;
376 }
377 }
378 }
379 return ss.str();
380 }
381
SaveReadyQueueStatusInfo()382 std::string SaveReadyQueueStatusInfo()
383 {
384 std::ostringstream ss;
385 ss << "<<<=== ready queue status ===>>>" << std::endl;
386 ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
387 for (int i = 0; i < _qos() + 1; i++) {
388 int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
389 if (!nt) {
390 continue;
391 }
392
393 for (int j = 0; j < nt; j++) {
394 CPUEUTask* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
395 if (t == nullptr) {
396 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << ">"
397 << " null" << std::endl;
398 continue;
399 }
400 if (t->type == 0) {
401 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << "> id "
402 << t->gid << " name " << t->label.c_str() << std::endl;
403 }
404 }
405 }
406 return ss.str();
407 }
408
SaveTaskStatusInfo(void)409 std::string SaveTaskStatusInfo(void)
410 {
411 std::string ffrtStackInfo;
412 std::ostringstream ss;
413 auto unfree = SimpleAllocator<CPUEUTask>::getUnfreedMem();
414 auto apply = [&](const char* tag, const std::function<bool(CPUEUTask*)>& filter) {
415 decltype(unfree) tmp;
416 for (auto t : unfree) {
417 if (filter(t)) {
418 tmp.emplace_back(t);
419 }
420 }
421
422 if (tmp.size() > 0) {
423 ss << "<<<=== " << tag << "===>>>" << std::endl;
424 ffrtStackInfo += ss.str();
425 }
426 size_t idx = 1;
427 for (auto t : tmp) {
428 ss.str("");
429 if (t->type == 0) {
430 ss << "<" << idx++ << "/" << tmp.size() << ">" << "id" << t->gid << "qos"
431 << t->qos() << "name" << t->label.c_str() << std::endl;
432 }
433 ffrtStackInfo += ss.str();
434 if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
435 std::string dumpInfo;
436 CPUEUTask::DumpTask(t, dumpInfo, 1);
437 ffrtStackInfo += dumpInfo;
438 }
439 }
440 };
441
442 apply("blocked by synchronization primitive(mutex etc)", [](CPUEUTask* t) {
443 return (t->state == TaskState::RUNNING) && t->coRoutine &&
444 t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
445 });
446 apply("blocked by task dependence", [](CPUEUTask* t) {
447 return t->state == TaskState::BLOCKED;
448 });
449 apply("pending task", [](CPUEUTask* t) {
450 return t->state == TaskState::PENDING;
451 });
452
453 return ffrtStackInfo;
454 }
455 #endif
456 #endif /* FFRT_BBOX_ENABLE */