• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifdef FFRT_BBOX_ENABLE
16 
17 #include "bbox.h"
18 #include <sys/syscall.h>
19 #include <unistd.h>
20 #include <csignal>
21 #include <cstdlib>
22 #include <string>
23 #include <sstream>
24 #ifdef FFRT_CO_BACKTRACE_ENABLE
25 #include <utils/CallStack.h>
26 #include "core/task_ctx.h"
27 #endif
28 #include "dfx/log/ffrt_log_api.h"
29 #include "sched/scheduler.h"
30 
31 static std::atomic<unsigned int> g_taskSubmitCounter(0);
32 static std::atomic<unsigned int> g_taskDoneCounter(0);
33 static std::atomic<unsigned int> g_taskEnQueueCounter(0);
34 static std::atomic<unsigned int> g_taskRunCounter(0);
35 static std::atomic<unsigned int> g_taskSwitchCounter(0);
36 static std::atomic<unsigned int> g_taskFinishCounter(0);
37 
38 static struct sigaction s_oldSa[SIGSYS + 1]; // SIGSYS = 31
39 
40 using namespace ffrt;
41 
TaskSubmitCounterInc(void)42 void TaskSubmitCounterInc(void)
43 {
44     ++g_taskSubmitCounter;
45 }
46 
TaskDoneCounterInc(void)47 void TaskDoneCounterInc(void)
48 {
49     ++g_taskDoneCounter;
50 }
51 
TaskEnQueuCounterInc(void)52 void TaskEnQueuCounterInc(void)
53 {
54     ++g_taskEnQueueCounter;
55 }
56 
TaskRunCounterInc(void)57 void TaskRunCounterInc(void)
58 {
59     ++g_taskRunCounter;
60 }
61 
TaskSwitchCounterInc(void)62 void TaskSwitchCounterInc(void)
63 {
64     ++g_taskSwitchCounter;
65 }
66 
TaskFinishCounterInc(void)67 void TaskFinishCounterInc(void)
68 {
69     ++g_taskFinishCounter;
70 }
71 
SaveCurrent()72 static inline void SaveCurrent()
73 {
74     FFRT_BBOX_LOG("<<<=== current status ===>>>");
75     auto t = ExecuteCtx::Cur()->task;
76     if (t) {
77         FFRT_BBOX_LOG("current: thread id %u, task id %lu, qos %d, name %s", gettid(),
78             t->gid, t->qos(), t->label.c_str());
79     }
80 
81     const int IGNORE_DEPTH = 3;
82     backtrace(IGNORE_DEPTH);
83 }
84 
SaveTaskCounter()85 static inline void SaveTaskCounter()
86 {
87     FFRT_BBOX_LOG("<<<=== task counter ===>>>");
88     FFRT_BBOX_LOG("FFRT BBOX TaskSubmitCounter:%u TaskEnQueueCounter:%u TaskDoneCounter:%u",
89         g_taskSubmitCounter.load(), g_taskEnQueueCounter.load(), g_taskDoneCounter.load());
90     FFRT_BBOX_LOG("FFRT BBOX TaskRunCounter:%u TaskSwitchCounter:%u TaskFinishCounter:%u", g_taskRunCounter.load(),
91         g_taskSwitchCounter.load(), g_taskFinishCounter.load());
92     if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
93         FFRT_BBOX_LOG("TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter");
94     } else {
95         FFRT_BBOX_LOG("TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter");
96     }
97 }
98 
SaveWorkerStatus()99 static inline void SaveWorkerStatus()
100 {
101     WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
102     FFRT_BBOX_LOG("<<<=== worker status ===>>>");
103     for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
104         std::unique_lock lock(workerGroup[i].tgMutex);
105         for (auto& thread : workerGroup[i].threads) {
106             TaskCtx* t = thread.first->curTask;
107             if (t == nullptr) {
108                 FFRT_BBOX_LOG("qos %d: worker tid %d is running nothing", i, thread.first->Id());
109                 continue;
110             }
111             FFRT_BBOX_LOG("qos %d: worker tid %d is running task id %lu name %s", i, thread.first->Id(),
112                 t->gid, t->label.c_str());
113         }
114     }
115 }
116 
SaveReadyQueueStatus()117 static inline void SaveReadyQueueStatus()
118 {
119     FFRT_BBOX_LOG("<<<=== ready queue status ===>>>");
120     for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
121         int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
122         if (!nt) {
123             continue;
124         }
125 
126         for (int j = 0; j < nt; j++) {
127             TaskCtx* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
128             if (t == nullptr) {
129                 FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> null", i + 1, j, nt);
130                 continue;
131             }
132             FFRT_BBOX_LOG("qos %d: ready queue task <%d/%d> id %lu name %s",
133                 i + 1, j, nt, t->gid, t->label.c_str());
134         }
135     }
136 }
137 
SaveTaskStatus()138 static inline void SaveTaskStatus()
139 {
140     auto unfree = SimpleAllocator<TaskCtx>::getUnfreedMem();
141     auto apply = [&](const char* tag, const std::function<bool(TaskCtx*)>& filter) {
142         decltype(unfree) tmp;
143         for (auto t : unfree) {
144             if (filter(t)) {
145                 tmp.emplace_back(t);
146             }
147         }
148 
149         if (tmp.size() > 0) {
150             FFRT_BBOX_LOG("<<<=== %s ===>>>", tag);
151         }
152         size_t idx = 1;
153         for (auto t : tmp) {
154             FFRT_BBOX_LOG("<%zu/%lu> id %lu qos %d name %s", idx++,
155                 tmp.size(), t->gid, t->qos(), t->label.c_str());
156             if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
157                 CoStart(t);
158             }
159         }
160     };
161 
162     apply("blocked by synchronization primitive(mutex etc)", [](TaskCtx* t) {
163         return (t->state == TaskState::RUNNING) && t->coRoutine &&
164             t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
165     });
166     apply("blocked by task dependence", [](TaskCtx* t) {
167         return t->state == TaskState::BLOCKED;
168     });
169     apply("pending task", [](TaskCtx* t) {
170         return t->state == TaskState::PENDING;
171     });
172 }
173 
174 static std::atomic_uint g_bbox_tid_is_dealing {0};
175 static std::atomic_uint g_bbox_called_times {0};
176 static std::condition_variable g_bbox_cv;
177 static std::mutex g_bbox_mtx;
178 
BboxFreeze()179 void BboxFreeze()
180 {
181     std::unique_lock<std::mutex> lk(g_bbox_mtx);
182     g_bbox_cv.wait(lk, [] { return g_bbox_tid_is_dealing.load() == 0; });
183 }
184 
backtrace(int ignoreDepth)185 void backtrace(int ignoreDepth)
186 {
187     FFRT_BBOX_LOG("backtrace");
188 
189 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
190     std::string dumpInfo;
191     TaskCtx::DumpTask(nullptr, dumpInfo);
192 #endif
193 }
194 
GetBboxEnableState(void)195 unsigned int GetBboxEnableState(void)
196 {
197     return g_bbox_tid_is_dealing.load();
198 }
199 
FFRTIsWork()200 bool FFRTIsWork()
201 {
202     if (g_taskSubmitCounter.load() == 0) {
203         return false;
204     } else if (g_taskSubmitCounter.load() == g_taskDoneCounter.load()) {
205         FFRT_BBOX_LOG("ffrt already finished, TaskSubmitCounter:%u, TaskDoneCounter:%u",
206             g_taskSubmitCounter.load(), g_taskDoneCounter.load());
207         return false;
208     }
209 
210     return true;
211 }
212 
SaveTheBbox()213 void SaveTheBbox()
214 {
215     unsigned int expect = 0;
216     unsigned int tid = static_cast<unsigned int>(gettid());
217     if (!g_bbox_tid_is_dealing.compare_exchange_strong(expect, tid)) {
218         if (tid == g_bbox_tid_is_dealing.load()) {
219             FFRT_BBOX_LOG("thread %u black box save failed", tid);
220             g_bbox_tid_is_dealing.store(0);
221             g_bbox_cv.notify_all();
222         } else {
223             FFRT_BBOX_LOG("thread %u trigger signal again, when thread %u is saving black box",
224                 tid, g_bbox_tid_is_dealing.load());
225             BboxFreeze(); // hold other thread's signal resend
226         }
227         return;
228     }
229 
230     if (g_bbox_called_times.fetch_add(1) == 0) { // only save once
231         FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) start ===>>>");
232         SaveCurrent();
233         SaveTaskCounter();
234         SaveWorkerStatus();
235         SaveReadyQueueStatus();
236         SaveTaskStatus();
237         FFRT_BBOX_LOG("<<<=== ffrt black box(BBOX) finish ===>>>");
238     }
239 
240     std::lock_guard lk(g_bbox_mtx);
241     g_bbox_tid_is_dealing.store(0);
242     g_bbox_cv.notify_all();
243 }
244 
ResendSignal(siginfo_t * info)245 static void ResendSignal(siginfo_t* info)
246 {
247     int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), syscall(SYS_gettid), info->si_signo, info);
248     if (rc != 0) {
249         FFRT_BBOX_LOG("ffrt failed to resend signal during crash");
250     }
251 }
252 
GetSigName(const siginfo_t * info)253 static const char* GetSigName(const siginfo_t* info)
254 {
255     switch (info->si_signo) {
256         case SIGABRT: return "SIGABRT";
257         case SIGBUS: return "SIGBUS";
258         case SIGFPE: return "SIGFPE";
259         case SIGILL: return "SIGILL";
260         case SIGSEGV: return "SIGSEGV";
261         case SIGSTKFLT: return "SIGSTKFLT";
262         case SIGSTOP: return "SIGSTOP";
263         case SIGSYS: return "SIGSYS";
264         case SIGTRAP: return "SIGTRAP";
265         default: return "?";
266     }
267 }
268 
SignalHandler(int signo,siginfo_t * info,void * context)269 static void SignalHandler(int signo, siginfo_t* info, void* context __attribute__((unused)))
270 {
271     if (FFRTIsWork()) {
272         FFRT_BBOX_LOG("recv signal %d (%s) code %d", signo, GetSigName(info), info->si_code);
273         SaveTheBbox();
274     }
275 
276     // we need to deregister our signal handler for that signal before continuing.
277     sigaction(signo, &s_oldSa[signo], nullptr);
278     ResendSignal(info);
279 }
280 
SignalReg(int signo)281 static void SignalReg(int signo)
282 {
283     sigaction(signo, nullptr, &s_oldSa[signo]);
284     struct sigaction newAction;
285     newAction.sa_flags = SA_RESTART | SA_SIGINFO;
286     newAction.sa_sigaction = SignalHandler;
287     sigaction(signo, &newAction, nullptr);
288 }
289 
BBoxInit()290 __attribute__((constructor)) static void BBoxInit()
291 {
292     SignalReg(SIGABRT);
293     SignalReg(SIGBUS);
294     SignalReg(SIGFPE);
295     SignalReg(SIGILL);
296     SignalReg(SIGSEGV);
297     SignalReg(SIGSTKFLT);
298     SignalReg(SIGSYS);
299     SignalReg(SIGTRAP);
300     SignalReg(SIGINT);
301     SignalReg(SIGKILL);
302 }
303 
304 #ifdef FFRT_CO_BACKTRACE_OH_ENABLE
SaveTaskCounterInfo(void)305 std::string SaveTaskCounterInfo(void)
306 {
307     std::ostringstream ss;
308     ss << "<<<=== task counter ===>>>" << std::endl;
309     ss << "FFRT BBOX TaskSubmitCounter:" << g_taskSubmitCounter.load() << " TaskEnQueueCounter:"
310        << g_taskEnQueueCounter.load() << " TaskDoneCounter:" << g_taskDoneCounter.load() << std::endl;
311     ss << "FFRT BBOX TaskRunCounter:" << g_taskRunCounter.load() << " TaskSwitchCounter:"
312        << g_taskSwitchCounter.load() << " TaskFinishCounter:" << g_taskFinishCounter.load() << std::endl;
313 
314     if (g_taskSwitchCounter.load() + g_taskFinishCounter.load() == g_taskRunCounter.load()) {
315         ss << "TaskRunCounter equals TaskSwitchCounter + TaskFinishCounter" << std::endl;
316     } else {
317         ss << "TaskRunCounter is not equal to TaskSwitchCounter + TaskFinishCounter" << std::endl;
318     }
319     return ss.str();
320 }
321 
SaveWorkerStatusInfo(void)322 std::string SaveWorkerStatusInfo(void)
323 {
324     std::ostringstream ss;
325     WorkerGroupCtl* workerGroup = ExecuteUnit::Instance().GetGroupCtl();
326     ss << "<<<=== worker status ===>>>" << std::endl;
327     for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
328         std::unique_lock lock(workerGroup[i].tgMutex);
329         for (auto& thread : workerGroup[i].threads) {
330             TaskCtx* t = thread.first->curTask;
331             if (t == nullptr) {
332                 ss << "qos " << i << ": worker tid " << thread.first->Id()
333                    << " is running nothing" << std::endl;
334                 continue;
335             }
336             ss << "qos " << i << ": worker tid " << thread.first->Id()
337                << " is running task id " << t->gid << " name " << t->label.c_str() << std::endl;
338         }
339     }
340     return ss.str();
341 }
342 
SaveReadyQueueStatusInfo()343 std::string SaveReadyQueueStatusInfo()
344 {
345     std::ostringstream ss;
346     ss << "<<<=== ready queue status ===>>>" << std::endl;
347     for (int i = 0; i < static_cast<int>(qos_max) + 1; i++) {
348         int nt = FFRTScheduler::Instance()->GetScheduler(QoS(i)).RQSize();
349         if (!nt) {
350             continue;
351         }
352 
353         for (int j = 0; j < nt; j++) {
354             TaskCtx* t = FFRTScheduler::Instance()->GetScheduler(QoS(i)).PickNextTask();
355             if (t == nullptr) {
356                 ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << ">"
357                    << " null" << std::endl;
358                 continue;
359             }
360             ss << "qos " << (i + 1) << ": ready queue task <" << j << "/" << nt << "> id "
361                << t->gid << " name " << t->label.c_str() << std::endl;
362         }
363     }
364     return ss.str();
365 }
366 
SaveTaskStatusInfo(void)367 std::string SaveTaskStatusInfo(void)
368 {
369     std::string ffrtStackInfo;
370     std::ostringstream ss;
371     auto unfree = SimpleAllocator<TaskCtx>::getUnfreedMem();
372     auto apply = [&](const char* tag, const std::function<bool(TaskCtx*)>& filter) {
373         decltype(unfree) tmp;
374         for (auto t : unfree) {
375             if (filter(t)) {
376                 tmp.emplace_back(t);
377             }
378         }
379 
380         if (tmp.size() > 0) {
381             ss << "<<<=== " << tag << "===>>>" << std::endl;
382             ffrtStackInfo += ss.str();
383         }
384         size_t idx = 1;
385         for (auto t : tmp) {
386             ss.str("");
387             ss << "<" << idx++ << "/" << tmp.size() << ">" << "id" << t->gid << "qos"
388                << t->qos() << "name" << t->label.c_str() << std::endl;
389             ffrtStackInfo += ss.str();
390             if (t->coRoutine && (t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH))) {
391                 std::string dumpInfo;
392                 TaskCtx::DumpTask(t, dumpInfo, 1);
393                 ffrtStackInfo += dumpInfo;
394             }
395         }
396     };
397 
398     apply("blocked by synchronization primitive(mutex etc)", [](TaskCtx* t) {
399         return (t->state == TaskState::RUNNING) && t->coRoutine &&
400             t->coRoutine->status.load() == static_cast<int>(CoStatus::CO_NOT_FINISH);
401     });
402     apply("blocked by task dependence", [](TaskCtx* t) {
403         return t->state == TaskState::BLOCKED;
404     });
405     apply("pending task", [](TaskCtx* t) {
406         return t->state == TaskState::PENDING;
407     });
408 
409     return ffrtStackInfo;
410 }
411 #endif
412 #endif /* FFRT_BBOX_ENABLE */