• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_cs.h"
7 
8 #include <arpa/inet.h>
9 #include <netinet/in.h>
10 #include <sys/socket.h>
11 
12 #include "tu_device.h"
13 
14 /* A simple implementations of breadcrumbs tracking of GPU progress
15  * intended to be a last resort when debugging unrecoverable hangs.
16  * For best results use Vulkan traces to have a predictable place of hang.
17  *
18  * For ordinary hangs as a more user-friendly solution use GFR
19  * "Graphics Flight Recorder".
20  *
21  * This implementation aims to handle cases where we cannot do anything
22  * after the hang, which is achieved by:
23  * - On GPU after each breadcrumb we wait until CPU acks it and sends udp
24  *    packet to the remote host;
25  * - At specified breadcrumb require explicit user input to continue
26  *   execution up to the next breadcrumb.
27  *
28  * In-driver breadcrumbs also allow more precise tracking since we could
29  * target a single GPU packet.
30  *
31  *
32  * Breadcrumbs settings:
33  *
34  *  TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS
35  * Where:
36  *  $BREAKPOINT - the breadcrumb from which we require explicit ack
37  *  $BREAKPOINT_HITS - how many times breakpoint should be reached for
38  *   break to occur. Necessary for a gmem mode and re-usable cmdbuffers
39  *   in both of which the same cmdstream could be executed several times.
40  *
41  *
42  * A typical work flow would be:
43  * - Start listening for breadcrumbs on remote host:
44  *    nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}'
45  *
46  * - Start capturing command stream:
47  *    sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd
48  *
49  * - On device replay the hanging trace with:
50  *    TU_BREADCRUMBS=$IP:$PORT,break=-1:0
51  *   ! Try to reproduce the hang in a sysmem mode because it would
52  *   require much less breadcrumb writes and syncs.
53  *
54  * - Increase hangcheck period:
55  *    echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms
56  *
57  * - After GPU hang note the last breadcrumb and relaunch trace with:
58  *    TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS
59  *
60  * - After the breakpoint is reached each breadcrumb would require
61  *   explicit ack from the user. This way it's possible to find
62  *   the last packet which did't hang.
63  *
64  * - Find the packet in the decoded cmdstream.
65  */
66 
67 struct breadcrumbs_context
68 {
69    char remote_host[64];
70    int remote_port;
71    uint32_t breadcrumb_breakpoint;
72    uint32_t breadcrumb_breakpoint_hits;
73 
74    bool thread_stop;
75    pthread_t breadcrumbs_thread;
76 
77    struct tu_device *device;
78 
79    uint32_t breadcrumb_idx;
80 };
81 
82 static void *
sync_gpu_with_cpu(void * _job)83 sync_gpu_with_cpu(void *_job)
84 {
85    struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job;
86    struct tu6_global *global =
87       (struct tu6_global *) ctx->device->global_bo->map;
88    uint32_t last_breadcrumb = 0;
89    uint32_t breakpoint_hits = 0;
90 
91    int s = socket(AF_INET, SOCK_DGRAM, 0);
92 
93    if (s < 0) {
94       mesa_loge("TU_BREADCRUMBS: Error while creating socket");
95       return NULL;
96    }
97 
98    struct sockaddr_in to_addr;
99    to_addr.sin_family = AF_INET;
100    to_addr.sin_port = htons(ctx->remote_port);
101    to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host);
102 
103    /* Run until we know that no more work would be submitted,
104     * because each breadcrumb requires an ack from cpu side and without
105     * the ack GPU would timeout.
106     */
107    while (!ctx->thread_stop) {
108       uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno;
109 
110       if (current_breadcrumb != last_breadcrumb) {
111          last_breadcrumb = current_breadcrumb;
112 
113          uint32_t data = htonl(last_breadcrumb);
114          if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr,
115                     sizeof(to_addr)) < 0) {
116             mesa_loge("TU_BREADCRUMBS: sendto failed");
117             goto fail;
118          }
119 
120          if (last_breadcrumb >= ctx->breadcrumb_breakpoint &&
121              breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) {
122             printf("GPU is on breadcrumb %d, continue?", last_breadcrumb);
123             while (getchar() != 'y')
124                ;
125          }
126 
127          if (ctx->breadcrumb_breakpoint == last_breadcrumb)
128             breakpoint_hits++;
129 
130          /* ack that we received the value */
131          global->breadcrumb_cpu_sync_seqno = last_breadcrumb;
132       }
133    }
134 
135 fail:
136    close(s);
137 
138    return NULL;
139 }
140 
141 /* Same as tu_cs_emit_pkt7 but without instrumentation */
142 static inline void
emit_pkt7(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)143 emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
144 {
145    tu_cs_reserve(cs, cnt + 1);
146    tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
147 }
148 
149 void
tu_breadcrumbs_init(struct tu_device * device)150 tu_breadcrumbs_init(struct tu_device *device)
151 {
152    const char *breadcrumbs_opt = NULL;
153 #ifdef TU_BREADCRUMBS_ENABLED
154    breadcrumbs_opt = os_get_option("TU_BREADCRUMBS");
155 #endif
156 
157    device->breadcrumbs_ctx = NULL;
158    if (!breadcrumbs_opt) {
159       return;
160    }
161 
162    struct breadcrumbs_context *ctx =
163       malloc(sizeof(struct breadcrumbs_context));
164    ctx->device = device;
165    ctx->breadcrumb_idx = 0;
166    ctx->thread_stop = false;
167 
168    if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host,
169               &ctx->remote_port, &ctx->breadcrumb_breakpoint,
170               &ctx->breadcrumb_breakpoint_hits) != 4) {
171       free(ctx);
172       mesa_loge("Wrong TU_BREADCRUMBS value");
173       return;
174    }
175 
176    device->breadcrumbs_ctx = ctx;
177 
178    struct tu6_global *global = device->global_bo->map;
179    global->breadcrumb_cpu_sync_seqno = 0;
180    global->breadcrumb_gpu_sync_seqno = 0;
181 
182    pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx);
183 }
184 
185 void
tu_breadcrumbs_finish(struct tu_device * device)186 tu_breadcrumbs_finish(struct tu_device *device)
187 {
188    struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
189    if (!ctx || ctx->thread_stop)
190       return;
191 
192    ctx->thread_stop = true;
193    pthread_join(ctx->breadcrumbs_thread, NULL);
194 
195    free(ctx);
196 }
197 
198 void
tu_cs_emit_sync_breadcrumb(struct tu_cs * cs,uint8_t opcode,uint16_t cnt)199 tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
200 {
201    /* TODO: we may run out of space if we add breadcrumbs
202     * to non-growable CS.
203     */
204    if (cs->mode != TU_CS_MODE_GROW)
205       return;
206 
207    struct tu_device *device = cs->device;
208    struct breadcrumbs_context *ctx = device->breadcrumbs_ctx;
209    if (!ctx || ctx->thread_stop)
210       return;
211 
212    bool before_packet = (cnt != 0);
213 
214    if (before_packet) {
215       switch (opcode) {
216       case CP_EXEC_CS_INDIRECT:
217       case CP_EXEC_CS:
218       case CP_DRAW_INDX:
219       case CP_DRAW_INDX_OFFSET:
220       case CP_DRAW_INDIRECT:
221       case CP_DRAW_INDX_INDIRECT:
222       case CP_DRAW_INDIRECT_MULTI:
223       case CP_DRAW_AUTO:
224       case CP_BLIT:
225          // case CP_SET_DRAW_STATE:
226          // case CP_LOAD_STATE6_FRAG:
227          // case CP_LOAD_STATE6_GEOM:
228          break;
229       default:
230          return;
231       };
232    } else {
233       assert(cs->breadcrumb_emit_after == 0);
234    }
235 
236    uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx);
237 
238    if (ctx->breadcrumb_breakpoint != -1 &&
239        current_breadcrumb < ctx->breadcrumb_breakpoint)
240       return;
241 
242    emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
243    emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
244    emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
245 
246    emit_pkt7(cs, CP_MEM_WRITE, 3);
247    tu_cs_emit_qw(
248       cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno));
249    tu_cs_emit(cs, current_breadcrumb);
250 
251    /* Wait until CPU acknowledges the value written by GPU */
252    emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
253    tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
254                      CP_WAIT_REG_MEM_0_POLL_MEMORY);
255    tu_cs_emit_qw(
256       cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno));
257    tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb));
258    tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0));
259    tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
260 
261    if (before_packet)
262       cs->breadcrumb_emit_after = cnt;
263 }
264