• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifdef FFRT_BBOX_ENABLE
16 
17 #include "bbox.h"
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <csignal>
21 #include <cstdlib>
22 #include <string>
23 #include <sstream>
24 #include "dfx/log/ffrt_log_api.h"
25 #include "sched/scheduler.h"
26 
27 using namespace ffrt;
28 
29 static std::atomic<unsigned int> g_taskSubmitCounter(0);
30 static std::atomic<unsigned int> g_taskDoneCounter(0);
31 static std::atomic<unsigned int> g_taskEnQueueCounter(0);
32 static std::atomic<unsigned int> g_taskRunCounter(0);
33 static std::atomic<unsigned int> g_taskSwitchCounter(0);
34 static std::atomic<unsigned int> g_taskFinishCounter(0);
35 #ifdef FFRT_IO_TASK_SCHEDULER
36 static std::atomic<unsigned int> g_taskPendingCounter(0);
37 static std::atomic<unsigned int> g_taskWakeCounter(0);
38 #endif
39 static CPUEUTask* g_cur_task;
40 std::mutex bbox_handle_lock;
41 std::condition_variable bbox_handle_end;
42 
43 static struct sigaction s_oldSa[SIGSYS + 1]; // SIGSYS = 31
44 
TaskSubmitCounterInc(void)45 void TaskSubmitCounterInc(void)
46 {
47     ++g_taskSubmitCounter;
48 }
49 
50 #ifdef FFRT_IO_TASK_SCHEDULER
TaskWakeCounterInc(void)51 void TaskWakeCounterInc(void)
52 {
53     ++g_taskWakeCounter;
54 }
55 #endif
56 
TaskDoneCounterInc(void)57 void TaskDoneCounterInc(void)
58 {
59     ++g_taskDoneCounter;
60 }
61 
TaskEnQueuCounterInc(void)62 void TaskEnQueuCounterInc(void)
63 {
64     ++g_taskEnQueueCounter;
65 }
66 
TaskRunCounterInc(void)67 void TaskRunCounterInc(void)
68 {
69     ++g_taskRunCounter;
70 }
71 
TaskSwitchCounterInc(void)72 void TaskSwitchCounterInc(void)
73 {
74     ++g_taskSwitchCounter;
75 }
76 
TaskFinishCounterInc(void)77 void TaskFinishCounterInc(void)
78 {
79     ++g_taskFinishCounter;
80 }
81 
82 #ifdef FFRT_IO_TASK_SCHEDULER
TaskPendingCounterInc(void)83 void TaskPendingCounterInc(void)
84 {
85     ++g_taskPendingCounter;
86 }
87 #endif
88 
SaveCurrent()89 static inline void SaveCurrent()
90 {
91     FFRT_BBOX_LOG("<<<=== current status ===>>>");
92     auto t = g_cur_task;
93     if (t) {
94         if (t->type == 0) {
95             FFRT_BBOX_LOG("current: thread id %u, task id %lu, qos %d, name %s", gettid(),
96                 t->gid, t->qos(), t->label.c_str());
97         }
98     }
99 }
100 
SaveTaskCounter()101 static inline void SaveTaskCounter()
102 {
103     FFRT_BBOX_LOG("<<<=== task counter ===>>>");
104     FFRT_BBOX_LOG("FFRT BBOX TaskSubmitCounter:%u TaskEnQueueCounter:%u TaskDoneCounter:%u",
105         g_taskSubmitCounter.load(), g_taskEnQueueCounter.load(), g_taskDoneCounter.load());
106     FFRT_BBOX_LOG("FFRT BBOX TaskRunCounter:%u TaskSwitchCounter:%u TaskFinishCounter:%u", g_taskRunCounter.load(),
107         g_taskSwitchCounter.load(), g_taskFinishCounter.load());
108 #ifdef FFRT_IO_TASK_SCHEDULER
109     FFRT_BBOX_LOG("FFRT BBOX TaskWakeCounterInc:%u, TaskPendingCounter:%u",
110         g_taskWakeCounter.load(), g_taskPendingCounter.load());
111 #endif
112     if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
113         FFRT_BBOX_LOG("TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter");
114     } else {
115         FFRT_BBOX_LOG("TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter");
116     }
117 }
118 
SaveWorkerStatus()119 static inline void SaveWorkerStatus()
120 {
121     WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
122     FFRT_BBOX_LOG("<<<=== worker status ===>>>");
123     ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
124     for (int i = 0; i < _qos() + 1; i++) {
125         std::shared_lock<std::shared_mutex> lck(workerGroup[i].tgMutex);
126         for (auto& thread : workerGroup[i].threads) {
127             CPUEUTask* t = thread.first->curTask;
128             if (t == nullptr) {
129                 FFRT_BBOX_LOG("qos %d: worker tid %d is running nothing", i, thread.first->Id());
130                 continue;
131             }
132             if (t->type == 0) {
133                 FFRT_BBOX_LOG("qos %d: worker tid %d is running task id %lu name %s", i, thread.first->Id(),
134                     t->gid, t->label.c_str());
135             }
136         }
137     }
138 }
139 
SaveReadyQueueStatus()140 static inline void SaveReadyQueueStatus()
141 {
142     FFRT_BBOX_LOG("<<<=== ready queue status ===>>>");
143     ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
144     for (int i = 0; i < _qos() + 1; i++) {
145         int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
146         if (!nt) {
147             continue;
148         }
149 
150         for (int j = 0; j < nt; j++) {
151             CPUEUTask* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
152             if (t == nullptr) {
153                 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> null", i + 1, j, nt);
154                 continue;
155             }
156             if (t->type == 0) {
157                 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> id %lu name %s",
158                     i + 1, j, nt, t->gid, t->label.c_str());
159             }
160         }
161     }
162 }
163 
SaveTaskStatus()164 static inline void SaveTaskStatus()
165 {
166     auto unfree = SimpleAllocator<CPUEUTask>::getUnfreedMem();
167     auto apply = [&](const char* tag, const std::function<bool(CPUEUTask*)>& filter) {
168         decltype(unfree) tmp;
169         for (auto t : unfree) {
170             if (filter(t)) {
171                 tmp.emplace_back(t);
172             }
173         }
174 
175         if (tmp.size() > 0) {
176             FFRT_BBOX_LOG("<<<=== %s ===>>>", tag);
177         }
178         size_t idx = 1;
179         for (auto t : tmp) {
180             if (t->type == 0) {
181                 FFRT_BBOX_LOG("<%zu/%lu> id %lu qos %d name %s", idx++,
182                     tmp.size(), t->gid, t->qos(), t->label.c_str());
183             }
184             if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))
185                 && t != g_cur_task) {
186                 CoStart(t);
187             }
188         }
189     };
190 
191     apply("blocked by synchronization primitive(mutex etc)", [](CPUEUTask* t) {
192         return (t->state == TaskState::RUNNING) && t->coRoutine &&
193             t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH) && t != g_cur_task;
194     });
195     apply("blocked by task dependence", [](CPUEUTask* t) {
196         return t->state == TaskState::BLOCKED;
197     });
198     apply("pending task", [](CPUEUTask* t) {
199         return t->state == TaskState::PENDING;
200     });
201 }
202 
203 static std::atomic_uint g_bbox_tid_is_dealing {0};
204 static std::atomic_uint g_bbox_called_times {0};
205 static std::condition_variable g_bbox_cv;
206 static std::mutex g_bbox_mtx;
207 
BboxFreeze()208 void BboxFreeze()
209 {
210     std::unique_lock<std::mutex> lk(g_bbox_mtx);
211     g_bbox_cv.wait(lk, [] { return g_bbox_tid_is_dealing.load() == 0; });
212 }
213 
backtrace(int ignoreDepth)214 void backtrace(int ignoreDepth)
215 {
216     FFRT_BBOX_LOG("backtrace");
217 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
218     std::string dumpInfo;
219     CPUEUTask::DumpTask(nullptr, dumpInfo);
220 #endif
221 }
222 
GetBboxEnableState(void)223 unsigned int GetBboxEnableState(void)
224 {
225     return g_bbox_tid_is_dealing.load();
226 }
227 
FFRTIsWork()228 bool FFRTIsWork()
229 {
230     if (g_taskSubmitCounter.load() == 0) {
231         return false;
232     } else if (g_taskSubmitCounter.load() == g_taskDoneCounter.load()) {
233         return false;
234     }
235 
236     return true;
237 }
238 
SaveTheBbox()239 void SaveTheBbox()
240 {
241     if (g_bbox_called_times.fetch_add(1) == 0) { // only save once
242         std::thread([&]() {
243             unsigned int expect = 0;
244             unsigned int tid = static_cast<unsigned int>(gettid());
245             (void)g_bbox_tid_is_dealing.compare_exchange_strong(expect, tid);
246 
247             FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) start ===>>>");
248             SaveCurrent();
249             SaveTaskCounter();
250             SaveWorkerStatus();
251             SaveReadyQueueStatus();
252             SaveTaskStatus();
253             FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) finish ===>>>");
254 
255             std::unique_lock handle_end_lk(bbox_handle_lock);
256             bbox_handle_end.notify_one();
257 
258             std::lock_guard lk(g_bbox_mtx);
259             g_bbox_tid_is_dealing.store(0);
260             g_bbox_cv.notify_all();
261         }).detach();
262 
263         {
264             std::unique_lock lk(bbox_handle_lock);
265             (void)bbox_handle_end.wait_for(lk, std::chrono::seconds(5));
266         }
267     } else {
268         unsigned int tid = static_cast<unsigned int>(gettid());
269         if (tid == g_bbox_tid_is_dealing.load()) {
270             FFRT_BBOX_LOG("thread %u black box save failed", tid);
271             g_bbox_tid_is_dealing.store(0);
272             g_bbox_cv.notify_all();
273         } else {
274             FFRT_BBOX_LOG("thread %u trigger signal again, when thread %u is saving black box",
275                 tid, g_bbox_tid_is_dealing.load());
276             BboxFreeze(); // hold other thread's signal resend
277         }
278     }
279 }
280 
ResendSignal(siginfo_t * info)281 static void ResendSignal(siginfo_t* info)
282 {
283     int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), syscall(SYS_gettid), info->si_signo, info);
284     if (rc != 0) {
285         FFRT_BBOX_LOG("ffrt failed to resend signal during crash");
286     }
287 }
288 
GetSigName(const siginfo_t * info)289 static const char* GetSigName(const siginfo_t* info)
290 {
291     switch (info->si_signo) {
292         case SIGABRT: return "SIGABRT";
293         case SIGBUS: return "SIGBUS";
294         case SIGFPE: return "SIGFPE";
295         case SIGILL: return "SIGILL";
296         case SIGSEGV: return "SIGSEGV";
297         case SIGSTKFLT: return "SIGSTKFLT";
298         case SIGSTOP: return "SIGSTOP";
299         case SIGSYS: return "SIGSYS";
300         case SIGTRAP: return "SIGTRAP";
301         default: return "?";
302     }
303 }
304 
SignalHandler(int signo,siginfo_t * info,void * context)305 static void SignalHandler(int signo, siginfo_t* info, void* context __attribute__((unused)))
306 {
307     g_cur_task = ExecuteCtx::Cur()->task;
308     if (FFRTIsWork()) {
309         SaveTheBbox();
310     }
311     // we need to deregister our signal handler for that signal before continuing.
312     sigaction(signo, &s_oldSa[signo], nullptr);
313     ResendSignal(info);
314 }
315 
SignalReg(int signo)316 static void SignalReg(int signo)
317 {
318     sigaction(signo, nullptr, &s_oldSa[signo]);
319     struct sigaction newAction;
320     newAction.sa_flags = SA_RESTART | SA_SIGINFO;
321     newAction.sa_sigaction = SignalHandler;
322     sigaction(signo, &newAction, nullptr);
323 }
324 
BBoxInit()325 __attribute__((constructor)) static void BBoxInit()
326 {
327     SignalReg(SIGABRT);
328     SignalReg(SIGBUS);
329     SignalReg(SIGFPE);
330     SignalReg(SIGILL);
331     SignalReg(SIGSEGV);
332     SignalReg(SIGSTKFLT);
333     SignalReg(SIGSYS);
334     SignalReg(SIGTRAP);
335     SignalReg(SIGINT);
336     SignalReg(SIGKILL);
337 }
338 
339 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
SaveTaskCounterInfo(void)340 std::string SaveTaskCounterInfo(void)
341 {
342     std::ostringstream ss;
343     ss << "<<<=== task counter ===>>>" << std::endl;
344     ss << "FFRT BBOX TaskSubmitCounter:" << g_taskSubmitCounter.load() << " TaskEnQueueCounter:"
345        << g_taskEnQueueCounter.load() << " TaskDoneCounter:" << g_taskDoneCounter.load() << std::endl;
346 
347     ss << "FFRT BBOX TaskRunCounter:" << g_taskRunCounter.load() << " TaskSwitchCounter:"
348        << g_taskSwitchCounter.load() << " TaskFinishCounter:" << g_taskFinishCounter.load() << std::endl;
349 
350     if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
351         ss << "TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter" << std::endl;
352     } else {
353         ss << "TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter" << std::endl;
354     }
355     return ss.str();
356 }
357 
SaveWorkerStatusInfo(void)358 std::string SaveWorkerStatusInfo(void)
359 {
360     std::ostringstream ss;
361     WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
362     ss << "<<<=== worker status ===>>>" << std::endl;
363     ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
364     for (int i = 0; i < _qos() + 1; i++) {
365         std::shared_lock<std::shared_mutex> lck(workerGroup[i].tgMutex);
366         for (auto& thread : workerGroup[i].threads) {
367             CPUEUTask* t = thread.first->curTask;
368             if (t == nullptr) {
369                 ss << "qos " << i << ": worker tid " << thread.first->Id()
370                    << " is running nothing" << std::endl;
371                 continue;
372             }
373             if (t->type == 0) {
374                 ss << "qos " << i << ": worker tid " << thread.first->Id()
375                 << " is running task id " << t->gid << " name " << t->label.c_str() << std::endl;
376             }
377         }
378     }
379     return ss.str();
380 }
381 
SaveReadyQueueStatusInfo()382 std::string SaveReadyQueueStatusInfo()
383 {
384     std::ostringstream ss;
385     ss << "<<<=== ready queue status ===>>>" << std::endl;
386     ffrt::QoS _qos = ffrt::QoS(static_cast<int>(qos_max));
387     for (int i = 0; i < _qos() + 1; i++) {
388         int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
389         if (!nt) {
390             continue;
391         }
392 
393         for (int j = 0; j < nt; j++) {
394             CPUEUTask* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
395             if (t == nullptr) {
396                 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << ">"
397                    << " null" << std::endl;
398                 continue;
399             }
400             if (t->type == 0) {
401                 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << "> id "
402                 << t->gid << " name " << t->label.c_str() << std::endl;
403             }
404         }
405     }
406     return ss.str();
407 }
408 
SaveTaskStatusInfo(void)409 std::string SaveTaskStatusInfo(void)
410 {
411     std::string ffrtStackInfo;
412     std::ostringstream ss;
413     auto unfree = SimpleAllocator<CPUEUTask>::getUnfreedMem();
414     auto apply = [&](const char* tag, const std::function<bool(CPUEUTask*)>& filter) {
415         decltype(unfree) tmp;
416         for (auto t : unfree) {
417             if (filter(t)) {
418                 tmp.emplace_back(t);
419             }
420         }
421 
422         if (tmp.size() > 0) {
423             ss << "<<<=== " << tag << "===>>>" << std::endl;
424             ffrtStackInfo += ss.str();
425         }
426         size_t idx = 1;
427         for (auto t : tmp) {
428             ss.str("");
429             if (t->type == 0) {
430                 ss << "<" << idx++ << "/" << tmp.size() << ">" << "id" << t->gid << "qos"
431                 << t->qos() << "name" << t->label.c_str() << std::endl;
432             }
433             ffrtStackInfo += ss.str();
434             if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
435                 std::string dumpInfo;
436                 CPUEUTask::DumpTask(t, dumpInfo, 1);
437                 ffrtStackInfo += dumpInfo;
438             }
439         }
440     };
441 
442     apply("blocked by synchronization primitive(mutex etc)", [](CPUEUTask* t) {
443         return (t->state == TaskState::RUNNING) && t->coRoutine &&
444             t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
445     });
446     apply("blocked by task dependence", [](CPUEUTask* t) {
447         return t->state == TaskState::BLOCKED;
448     });
449     apply("pending task", [](CPUEUTask* t) {
450         return t->state == TaskState::PENDING;
451     });
452 
453     return ffrtStackInfo;
454 }
455 #endif
456 #endif /* FFRT_BBOX_ENABLE */