1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #if HAVE_ALLOCA_H
28 # include <alloca.h>
29 #endif
30
31 #include "CUnit/Basic.h"
32
33 #include "amdgpu_test.h"
34 #include "amdgpu_drm.h"
35 #include "amdgpu_internal.h"
36
37 #include <pthread.h>
38
39
40 /*
41 * This defines the delay in MS after which memory location designated for
42 * compression against reference value is written to, unblocking command
43 * processor
44 */
45 #define WRITE_MEM_ADDRESS_DELAY_MS 100
46
47 #define PACKET_TYPE3 3
48
49 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
50 (((op) & 0xFF) << 8) | \
51 ((n) & 0x3FFF) << 16)
52
53 #define PACKET3_WAIT_REG_MEM 0x3C
54 #define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
55 /* 0 - always
56 * 1 - <
57 * 2 - <=
58 * 3 - ==
59 * 4 - !=
60 * 5 - >=
61 * 6 - >
62 */
63 #define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
64 /* 0 - reg
65 * 1 - mem
66 */
67 #define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
68 /* 0 - wait_reg_mem
69 * 1 - wr_wait_wr_reg
70 */
71 #define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
72 /* 0 - me
73 * 1 - pfp
74 */
75
76 #define PACKET3_WRITE_DATA 0x37
77 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
78 /* 0 - register
79 * 1 - memory (sync - via GRBM)
80 * 2 - gl2
81 * 3 - gds
82 * 4 - reserved
83 * 5 - memory (async - direct)
84 */
85 #define WR_ONE_ADDR (1 << 16)
86 #define WR_CONFIRM (1 << 20)
87 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
88 /* 0 - LRU
89 * 1 - Stream
90 */
91 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
92 /* 0 - me
93 * 1 - pfp
94 * 2 - ce
95 */
96
97 #define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54f
98
99 #define SDMA_PKT_HEADER_OP(x) (x & 0xff)
100 #define SDMA_OP_POLL_REGMEM 8
101
102 static amdgpu_device_handle device_handle;
103 static uint32_t major_version;
104 static uint32_t minor_version;
105
106 static pthread_t stress_thread;
107 static uint32_t *ptr;
108
109 int use_uc_mtype = 0;
110
111 static void amdgpu_deadlock_helper(unsigned ip_type);
112 static void amdgpu_deadlock_gfx(void);
113 static void amdgpu_deadlock_compute(void);
114 static void amdgpu_illegal_reg_access();
115 static void amdgpu_illegal_mem_access();
116 static void amdgpu_deadlock_sdma(void);
117 static void amdgpu_dispatch_hang_gfx(void);
118 static void amdgpu_dispatch_hang_compute(void);
119 static void amdgpu_dispatch_hang_slow_gfx(void);
120 static void amdgpu_dispatch_hang_slow_compute(void);
121 static void amdgpu_draw_hang_gfx(void);
122 static void amdgpu_draw_hang_slow_gfx(void);
123
suite_deadlock_tests_enable(void)124 CU_BOOL suite_deadlock_tests_enable(void)
125 {
126 CU_BOOL enable = CU_TRUE;
127
128 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
129 &minor_version, &device_handle))
130 return CU_FALSE;
131
132 /*
133 * Only enable for ASICs supporting GPU reset and for which it's enabled
134 * by default (currently GFX8/9 dGPUS)
135 */
136 if (device_handle->info.family_id != AMDGPU_FAMILY_VI &&
137 device_handle->info.family_id != AMDGPU_FAMILY_AI &&
138 device_handle->info.family_id != AMDGPU_FAMILY_CI) {
139 printf("\n\nGPU reset is not enabled for the ASIC, deadlock suite disabled\n");
140 enable = CU_FALSE;
141 }
142
143 if (device_handle->info.family_id >= AMDGPU_FAMILY_AI)
144 use_uc_mtype = 1;
145
146 if (amdgpu_device_deinitialize(device_handle))
147 return CU_FALSE;
148
149 return enable;
150 }
151
suite_deadlock_tests_init(void)152 int suite_deadlock_tests_init(void)
153 {
154 int r;
155
156 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
157 &minor_version, &device_handle);
158
159 if (r) {
160 if ((r == -EACCES) && (errno == EACCES))
161 printf("\n\nError:%s. "
162 "Hint:Try to run this test program as root.",
163 strerror(errno));
164 return CUE_SINIT_FAILED;
165 }
166
167 return CUE_SUCCESS;
168 }
169
suite_deadlock_tests_clean(void)170 int suite_deadlock_tests_clean(void)
171 {
172 int r = amdgpu_device_deinitialize(device_handle);
173
174 if (r == 0)
175 return CUE_SUCCESS;
176 else
177 return CUE_SCLEAN_FAILED;
178 }
179
180
181 CU_TestInfo deadlock_tests[] = {
182 { "gfx ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_gfx },
183 { "compute ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_compute },
184 { "sdma ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_sdma },
185 { "illegal reg access test", amdgpu_illegal_reg_access },
186 { "illegal mem access test (set amdgpu.vm_fault_stop=2)", amdgpu_illegal_mem_access },
187 { "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_gfx },
188 { "compute ring bad dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_compute },
189 { "gfx ring bad slow dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_slow_gfx },
190 { "compute ring bad slow dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_slow_compute },
191 { "gfx ring bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_gfx },
192 { "gfx ring slow bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_slow_gfx },
193 CU_TEST_INFO_NULL,
194 };
195
write_mem_address(void * data)196 static void *write_mem_address(void *data)
197 {
198 int i;
199
200 /* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
201 for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
202 usleep(1000);
203
204 ptr[256] = 0x1;
205
206 return 0;
207 }
208
amdgpu_deadlock_gfx(void)209 static void amdgpu_deadlock_gfx(void)
210 {
211 amdgpu_deadlock_helper(AMDGPU_HW_IP_GFX);
212 }
213
amdgpu_deadlock_compute(void)214 static void amdgpu_deadlock_compute(void)
215 {
216 amdgpu_deadlock_helper(AMDGPU_HW_IP_COMPUTE);
217 }
218
amdgpu_deadlock_helper(unsigned ip_type)219 static void amdgpu_deadlock_helper(unsigned ip_type)
220 {
221 amdgpu_context_handle context_handle;
222 amdgpu_bo_handle ib_result_handle;
223 void *ib_result_cpu;
224 uint64_t ib_result_mc_address;
225 struct amdgpu_cs_request ibs_request;
226 struct amdgpu_cs_ib_info ib_info;
227 struct amdgpu_cs_fence fence_status;
228 uint32_t expired;
229 int i, r;
230 amdgpu_bo_list_handle bo_list;
231 amdgpu_va_handle va_handle;
232
233 r = pthread_create(&stress_thread, NULL, write_mem_address, NULL);
234 CU_ASSERT_EQUAL(r, 0);
235
236 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
237 CU_ASSERT_EQUAL(r, 0);
238
239 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
240 AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
241 &ib_result_handle, &ib_result_cpu,
242 &ib_result_mc_address, &va_handle);
243 CU_ASSERT_EQUAL(r, 0);
244
245 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
246 &bo_list);
247 CU_ASSERT_EQUAL(r, 0);
248
249 ptr = ib_result_cpu;
250
251 ptr[0] = PACKET3(PACKET3_WAIT_REG_MEM, 5);
252 ptr[1] = (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
253 WAIT_REG_MEM_FUNCTION(4) | /* != */
254 WAIT_REG_MEM_ENGINE(0)); /* me */
255 ptr[2] = (ib_result_mc_address + 256*4) & 0xfffffffc;
256 ptr[3] = ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff;
257 ptr[4] = 0x00000000; /* reference value */
258 ptr[5] = 0xffffffff; /* and mask */
259 ptr[6] = 0x00000004; /* poll interval */
260
261 for (i = 7; i < 16; ++i)
262 ptr[i] = 0xffff1000;
263
264
265 ptr[256] = 0x0; /* the memory we wait on to change */
266
267
268
269 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
270 ib_info.ib_mc_address = ib_result_mc_address;
271 ib_info.size = 16;
272
273 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
274 ibs_request.ip_type = ip_type;
275 ibs_request.ring = 0;
276 ibs_request.number_of_ibs = 1;
277 ibs_request.ibs = &ib_info;
278 ibs_request.resources = bo_list;
279 ibs_request.fence_info.handle = NULL;
280 for (i = 0; i < 200; i++) {
281 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
282 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
283
284 }
285
286 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
287 fence_status.context = context_handle;
288 fence_status.ip_type = ip_type;
289 fence_status.ip_instance = 0;
290 fence_status.ring = 0;
291 fence_status.fence = ibs_request.seq_no;
292
293 r = amdgpu_cs_query_fence_status(&fence_status,
294 AMDGPU_TIMEOUT_INFINITE,0, &expired);
295 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
296
297 pthread_join(stress_thread, NULL);
298
299 r = amdgpu_bo_list_destroy(bo_list);
300 CU_ASSERT_EQUAL(r, 0);
301
302 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
303 ib_result_mc_address, 4096);
304 CU_ASSERT_EQUAL(r, 0);
305
306 r = amdgpu_cs_ctx_free(context_handle);
307 CU_ASSERT_EQUAL(r, 0);
308 }
309
amdgpu_deadlock_sdma(void)310 static void amdgpu_deadlock_sdma(void)
311 {
312 amdgpu_context_handle context_handle;
313 amdgpu_bo_handle ib_result_handle;
314 void *ib_result_cpu;
315 uint64_t ib_result_mc_address;
316 struct amdgpu_cs_request ibs_request;
317 struct amdgpu_cs_ib_info ib_info;
318 struct amdgpu_cs_fence fence_status;
319 uint32_t expired;
320 int i, r;
321 amdgpu_bo_list_handle bo_list;
322 amdgpu_va_handle va_handle;
323 struct drm_amdgpu_info_hw_ip info;
324 uint32_t ring_id;
325
326 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
327 CU_ASSERT_EQUAL(r, 0);
328
329 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
330 CU_ASSERT_EQUAL(r, 0);
331
332 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
333 r = pthread_create(&stress_thread, NULL, write_mem_address, NULL);
334 CU_ASSERT_EQUAL(r, 0);
335
336 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
337 AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
338 &ib_result_handle, &ib_result_cpu,
339 &ib_result_mc_address, &va_handle);
340 CU_ASSERT_EQUAL(r, 0);
341
342 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
343 &bo_list);
344 CU_ASSERT_EQUAL(r, 0);
345
346 ptr = ib_result_cpu;
347 i = 0;
348
349 ptr[i++] = SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
350 (0 << 26) | /* WAIT_REG_MEM */
351 (4 << 28) | /* != */
352 (1 << 31); /* memory */
353 ptr[i++] = (ib_result_mc_address + 256*4) & 0xfffffffc;
354 ptr[i++] = ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff;
355 ptr[i++] = 0x00000000; /* reference value */
356 ptr[i++] = 0xffffffff; /* and mask */
357 ptr[i++] = 4 | /* poll interval */
358 (0xfff << 16); /* retry count */
359
360 for (; i < 16; i++)
361 ptr[i] = 0;
362
363 ptr[256] = 0x0; /* the memory we wait on to change */
364
365 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
366 ib_info.ib_mc_address = ib_result_mc_address;
367 ib_info.size = 16;
368
369 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
370 ibs_request.ip_type = AMDGPU_HW_IP_DMA;
371 ibs_request.ring = ring_id;
372 ibs_request.number_of_ibs = 1;
373 ibs_request.ibs = &ib_info;
374 ibs_request.resources = bo_list;
375 ibs_request.fence_info.handle = NULL;
376
377 for (i = 0; i < 200; i++) {
378 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
379 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
380
381 }
382
383 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
384 fence_status.context = context_handle;
385 fence_status.ip_type = AMDGPU_HW_IP_DMA;
386 fence_status.ip_instance = 0;
387 fence_status.ring = ring_id;
388 fence_status.fence = ibs_request.seq_no;
389
390 r = amdgpu_cs_query_fence_status(&fence_status,
391 AMDGPU_TIMEOUT_INFINITE,0, &expired);
392 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
393
394 pthread_join(stress_thread, NULL);
395
396 r = amdgpu_bo_list_destroy(bo_list);
397 CU_ASSERT_EQUAL(r, 0);
398
399 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
400 ib_result_mc_address, 4096);
401 CU_ASSERT_EQUAL(r, 0);
402 }
403 r = amdgpu_cs_ctx_free(context_handle);
404 CU_ASSERT_EQUAL(r, 0);
405 }
406
bad_access_helper(int reg_access)407 static void bad_access_helper(int reg_access)
408 {
409 amdgpu_context_handle context_handle;
410 amdgpu_bo_handle ib_result_handle;
411 void *ib_result_cpu;
412 uint64_t ib_result_mc_address;
413 struct amdgpu_cs_request ibs_request;
414 struct amdgpu_cs_ib_info ib_info;
415 struct amdgpu_cs_fence fence_status;
416 uint32_t expired;
417 int i, r;
418 amdgpu_bo_list_handle bo_list;
419 amdgpu_va_handle va_handle;
420
421 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
422 CU_ASSERT_EQUAL(r, 0);
423
424 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
425 AMDGPU_GEM_DOMAIN_GTT, 0, 0,
426 &ib_result_handle, &ib_result_cpu,
427 &ib_result_mc_address, &va_handle);
428 CU_ASSERT_EQUAL(r, 0);
429
430 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
431 &bo_list);
432 CU_ASSERT_EQUAL(r, 0);
433
434 ptr = ib_result_cpu;
435 i = 0;
436
437 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
438 ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : WRITE_DATA_DST_SEL(5))| WR_CONFIRM;
439 ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : 0xdeadbee0;
440 ptr[i++] = 0;
441 ptr[i++] = 0xdeadbeef;
442
443 for (; i < 16; ++i)
444 ptr[i] = 0xffff1000;
445
446 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
447 ib_info.ib_mc_address = ib_result_mc_address;
448 ib_info.size = 16;
449
450 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
451 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
452 ibs_request.ring = 0;
453 ibs_request.number_of_ibs = 1;
454 ibs_request.ibs = &ib_info;
455 ibs_request.resources = bo_list;
456 ibs_request.fence_info.handle = NULL;
457
458 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
459 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
460
461
462 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
463 fence_status.context = context_handle;
464 fence_status.ip_type = AMDGPU_HW_IP_GFX;
465 fence_status.ip_instance = 0;
466 fence_status.ring = 0;
467 fence_status.fence = ibs_request.seq_no;
468
469 r = amdgpu_cs_query_fence_status(&fence_status,
470 AMDGPU_TIMEOUT_INFINITE,0, &expired);
471 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
472
473 r = amdgpu_bo_list_destroy(bo_list);
474 CU_ASSERT_EQUAL(r, 0);
475
476 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
477 ib_result_mc_address, 4096);
478 CU_ASSERT_EQUAL(r, 0);
479
480 r = amdgpu_cs_ctx_free(context_handle);
481 CU_ASSERT_EQUAL(r, 0);
482 }
483
amdgpu_illegal_reg_access()484 static void amdgpu_illegal_reg_access()
485 {
486 bad_access_helper(1);
487 }
488
amdgpu_illegal_mem_access()489 static void amdgpu_illegal_mem_access()
490 {
491 bad_access_helper(0);
492 }
493
amdgpu_dispatch_hang_gfx(void)494 static void amdgpu_dispatch_hang_gfx(void)
495 {
496 amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX);
497 }
498
amdgpu_dispatch_hang_compute(void)499 static void amdgpu_dispatch_hang_compute(void)
500 {
501 amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
502 }
503
amdgpu_dispatch_hang_slow_gfx(void)504 static void amdgpu_dispatch_hang_slow_gfx(void)
505 {
506 amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_GFX);
507 }
508
amdgpu_dispatch_hang_slow_compute(void)509 static void amdgpu_dispatch_hang_slow_compute(void)
510 {
511 amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
512 }
513
amdgpu_draw_hang_gfx(void)514 static void amdgpu_draw_hang_gfx(void)
515 {
516 int r;
517 struct drm_amdgpu_info_hw_ip info;
518 uint32_t ring_id;
519
520 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
521 CU_ASSERT_EQUAL(r, 0);
522 if (!info.available_rings)
523 printf("SKIP ... as there's no graphic ring\n");
524
525 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
526 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
527 amdgpu_memcpy_draw_test(device_handle, ring_id, 1);
528 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
529 }
530 }
531
amdgpu_draw_hang_slow_gfx(void)532 static void amdgpu_draw_hang_slow_gfx(void)
533 {
534 struct drm_amdgpu_info_hw_ip info;
535 uint32_t ring_id;
536 int r;
537
538 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
539 CU_ASSERT_EQUAL(r, 0);
540
541 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
542 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
543 amdgpu_memcpy_draw_hang_slow_test(device_handle, ring_id);
544 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
545 }
546 }
547