1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #if HAVE_ALLOCA_H
28 # include <alloca.h>
29 #endif
30
31 #include "CUnit/Basic.h"
32
33 #include "amdgpu_test.h"
34 #include "amdgpu_drm.h"
35 #include "amdgpu_internal.h"
36
37 #include <pthread.h>
38
39
40 /*
41 * This defines the delay in MS after which memory location designated for
42 * compression against reference value is written to, unblocking command
43 * processor
44 */
45 #define WRITE_MEM_ADDRESS_DELAY_MS 100
46
47 #define PACKET_TYPE3 3
48
49 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
50 (((op) & 0xFF) << 8) | \
51 ((n) & 0x3FFF) << 16)
52
53 #define PACKET3_WAIT_REG_MEM 0x3C
54 #define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
55 /* 0 - always
56 * 1 - <
57 * 2 - <=
58 * 3 - ==
59 * 4 - !=
60 * 5 - >=
61 * 6 - >
62 */
63 #define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
64 /* 0 - reg
65 * 1 - mem
66 */
67 #define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
68 /* 0 - wait_reg_mem
69 * 1 - wr_wait_wr_reg
70 */
71 #define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
72 /* 0 - me
73 * 1 - pfp
74 */
75
76 #define PACKET3_WRITE_DATA 0x37
77 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
78 /* 0 - register
79 * 1 - memory (sync - via GRBM)
80 * 2 - gl2
81 * 3 - gds
82 * 4 - reserved
83 * 5 - memory (async - direct)
84 */
85 #define WR_ONE_ADDR (1 << 16)
86 #define WR_CONFIRM (1 << 20)
87 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
88 /* 0 - LRU
89 * 1 - Stream
90 */
91 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
92 /* 0 - me
93 * 1 - pfp
94 * 2 - ce
95 */
96
97 #define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54f
98
99 #define SDMA_PKT_HEADER_OP(x) (x & 0xff)
100 #define SDMA_OP_POLL_REGMEM 8
101
102 static amdgpu_device_handle device_handle;
103 static uint32_t major_version;
104 static uint32_t minor_version;
105
106 static pthread_t stress_thread;
107 static uint32_t *ptr;
108
109 int use_uc_mtype = 0;
110
111 static void amdgpu_deadlock_helper(unsigned ip_type);
112 static void amdgpu_deadlock_gfx(void);
113 static void amdgpu_deadlock_compute(void);
114 static void amdgpu_illegal_reg_access();
115 static void amdgpu_illegal_mem_access();
116 static void amdgpu_deadlock_sdma(void);
117 static void amdgpu_dispatch_hang_gfx(void);
118 static void amdgpu_dispatch_hang_compute(void);
119 static void amdgpu_dispatch_hang_slow_gfx(void);
120 static void amdgpu_dispatch_hang_slow_compute(void);
121 static void amdgpu_draw_hang_gfx(void);
122 static void amdgpu_draw_hang_slow_gfx(void);
123
suite_deadlock_tests_enable(void)124 CU_BOOL suite_deadlock_tests_enable(void)
125 {
126 CU_BOOL enable = CU_TRUE;
127 uint32_t asic_id;
128
129 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
130 &minor_version, &device_handle))
131 return CU_FALSE;
132
133 /*
134 * Only enable for ASICs supporting GPU reset and for which it's enabled
135 * by default (currently GFX8/9 dGPUS)
136 */
137 if (device_handle->info.family_id != AMDGPU_FAMILY_VI &&
138 device_handle->info.family_id != AMDGPU_FAMILY_AI &&
139 device_handle->info.family_id != AMDGPU_FAMILY_CI) {
140 printf("\n\nGPU reset is not enabled for the ASIC, deadlock suite disabled\n");
141 enable = CU_FALSE;
142 }
143
144 asic_id = device_handle->info.asic_id;
145 if (asic_is_arcturus(asic_id)) {
146 if (amdgpu_set_test_active("Deadlock Tests",
147 "gfx ring block test (set amdgpu.lockup_timeout=50)",
148 CU_FALSE))
149 fprintf(stderr, "test deactivation failed - %s\n",
150 CU_get_error_msg());
151 }
152
153 if (device_handle->info.family_id >= AMDGPU_FAMILY_AI)
154 use_uc_mtype = 1;
155
156 if (amdgpu_device_deinitialize(device_handle))
157 return CU_FALSE;
158
159 return enable;
160 }
161
suite_deadlock_tests_init(void)162 int suite_deadlock_tests_init(void)
163 {
164 int r;
165
166 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
167 &minor_version, &device_handle);
168
169 if (r) {
170 if ((r == -EACCES) && (errno == EACCES))
171 printf("\n\nError:%s. "
172 "Hint:Try to run this test program as root.",
173 strerror(errno));
174 return CUE_SINIT_FAILED;
175 }
176
177 return CUE_SUCCESS;
178 }
179
suite_deadlock_tests_clean(void)180 int suite_deadlock_tests_clean(void)
181 {
182 int r = amdgpu_device_deinitialize(device_handle);
183
184 if (r == 0)
185 return CUE_SUCCESS;
186 else
187 return CUE_SCLEAN_FAILED;
188 }
189
190
191 CU_TestInfo deadlock_tests[] = {
192 { "gfx ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_gfx },
193 { "compute ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_compute },
194 { "sdma ring block test (set amdgpu.lockup_timeout=50)", amdgpu_deadlock_sdma },
195 { "illegal reg access test", amdgpu_illegal_reg_access },
196 { "illegal mem access test (set amdgpu.vm_fault_stop=2)", amdgpu_illegal_mem_access },
197 { "gfx ring bad dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_gfx },
198 { "compute ring bad dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_compute },
199 { "gfx ring bad slow dispatch test (set amdgpu.lockup_timeout=50)", amdgpu_dispatch_hang_slow_gfx },
200 { "compute ring bad slow dispatch test (set amdgpu.lockup_timeout=50,50)", amdgpu_dispatch_hang_slow_compute },
201 { "gfx ring bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_gfx },
202 { "gfx ring slow bad draw test (set amdgpu.lockup_timeout=50)", amdgpu_draw_hang_slow_gfx },
203 CU_TEST_INFO_NULL,
204 };
205
write_mem_address(void * data)206 static void *write_mem_address(void *data)
207 {
208 int i;
209
210 /* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
211 for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
212 usleep(1000);
213
214 ptr[256] = 0x1;
215
216 return 0;
217 }
218
amdgpu_deadlock_gfx(void)219 static void amdgpu_deadlock_gfx(void)
220 {
221 amdgpu_deadlock_helper(AMDGPU_HW_IP_GFX);
222 }
223
amdgpu_deadlock_compute(void)224 static void amdgpu_deadlock_compute(void)
225 {
226 amdgpu_deadlock_helper(AMDGPU_HW_IP_COMPUTE);
227 }
228
amdgpu_deadlock_helper(unsigned ip_type)229 static void amdgpu_deadlock_helper(unsigned ip_type)
230 {
231 amdgpu_context_handle context_handle;
232 amdgpu_bo_handle ib_result_handle;
233 void *ib_result_cpu;
234 uint64_t ib_result_mc_address;
235 struct amdgpu_cs_request ibs_request;
236 struct amdgpu_cs_ib_info ib_info;
237 struct amdgpu_cs_fence fence_status;
238 uint32_t expired;
239 int i, r;
240 amdgpu_bo_list_handle bo_list;
241 amdgpu_va_handle va_handle;
242
243 r = pthread_create(&stress_thread, NULL, write_mem_address, NULL);
244 CU_ASSERT_EQUAL(r, 0);
245
246 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
247 CU_ASSERT_EQUAL(r, 0);
248
249 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
250 AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
251 &ib_result_handle, &ib_result_cpu,
252 &ib_result_mc_address, &va_handle);
253 CU_ASSERT_EQUAL(r, 0);
254
255 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
256 &bo_list);
257 CU_ASSERT_EQUAL(r, 0);
258
259 ptr = ib_result_cpu;
260
261 ptr[0] = PACKET3(PACKET3_WAIT_REG_MEM, 5);
262 ptr[1] = (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
263 WAIT_REG_MEM_FUNCTION(4) | /* != */
264 WAIT_REG_MEM_ENGINE(0)); /* me */
265 ptr[2] = (ib_result_mc_address + 256*4) & 0xfffffffc;
266 ptr[3] = ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff;
267 ptr[4] = 0x00000000; /* reference value */
268 ptr[5] = 0xffffffff; /* and mask */
269 ptr[6] = 0x00000004; /* poll interval */
270
271 for (i = 7; i < 16; ++i)
272 ptr[i] = 0xffff1000;
273
274
275 ptr[256] = 0x0; /* the memory we wait on to change */
276
277
278
279 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
280 ib_info.ib_mc_address = ib_result_mc_address;
281 ib_info.size = 16;
282
283 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
284 ibs_request.ip_type = ip_type;
285 ibs_request.ring = 0;
286 ibs_request.number_of_ibs = 1;
287 ibs_request.ibs = &ib_info;
288 ibs_request.resources = bo_list;
289 ibs_request.fence_info.handle = NULL;
290 for (i = 0; i < 200; i++) {
291 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
292 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
293
294 }
295
296 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
297 fence_status.context = context_handle;
298 fence_status.ip_type = ip_type;
299 fence_status.ip_instance = 0;
300 fence_status.ring = 0;
301 fence_status.fence = ibs_request.seq_no;
302
303 r = amdgpu_cs_query_fence_status(&fence_status,
304 AMDGPU_TIMEOUT_INFINITE,0, &expired);
305 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
306
307 pthread_join(stress_thread, NULL);
308
309 r = amdgpu_bo_list_destroy(bo_list);
310 CU_ASSERT_EQUAL(r, 0);
311
312 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
313 ib_result_mc_address, 4096);
314 CU_ASSERT_EQUAL(r, 0);
315
316 r = amdgpu_cs_ctx_free(context_handle);
317 CU_ASSERT_EQUAL(r, 0);
318 }
319
amdgpu_deadlock_sdma(void)320 static void amdgpu_deadlock_sdma(void)
321 {
322 amdgpu_context_handle context_handle;
323 amdgpu_bo_handle ib_result_handle;
324 void *ib_result_cpu;
325 uint64_t ib_result_mc_address;
326 struct amdgpu_cs_request ibs_request;
327 struct amdgpu_cs_ib_info ib_info;
328 struct amdgpu_cs_fence fence_status;
329 uint32_t expired;
330 int i, r;
331 amdgpu_bo_list_handle bo_list;
332 amdgpu_va_handle va_handle;
333 struct drm_amdgpu_info_hw_ip info;
334 uint32_t ring_id;
335
336 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
337 CU_ASSERT_EQUAL(r, 0);
338
339 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
340 CU_ASSERT_EQUAL(r, 0);
341
342 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
343 r = pthread_create(&stress_thread, NULL, write_mem_address, NULL);
344 CU_ASSERT_EQUAL(r, 0);
345
346 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
347 AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
348 &ib_result_handle, &ib_result_cpu,
349 &ib_result_mc_address, &va_handle);
350 CU_ASSERT_EQUAL(r, 0);
351
352 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
353 &bo_list);
354 CU_ASSERT_EQUAL(r, 0);
355
356 ptr = ib_result_cpu;
357 i = 0;
358
359 ptr[i++] = SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
360 (0 << 26) | /* WAIT_REG_MEM */
361 (4 << 28) | /* != */
362 (1 << 31); /* memory */
363 ptr[i++] = (ib_result_mc_address + 256*4) & 0xfffffffc;
364 ptr[i++] = ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff;
365 ptr[i++] = 0x00000000; /* reference value */
366 ptr[i++] = 0xffffffff; /* and mask */
367 ptr[i++] = 4 | /* poll interval */
368 (0xfff << 16); /* retry count */
369
370 for (; i < 16; i++)
371 ptr[i] = 0;
372
373 ptr[256] = 0x0; /* the memory we wait on to change */
374
375 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
376 ib_info.ib_mc_address = ib_result_mc_address;
377 ib_info.size = 16;
378
379 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
380 ibs_request.ip_type = AMDGPU_HW_IP_DMA;
381 ibs_request.ring = ring_id;
382 ibs_request.number_of_ibs = 1;
383 ibs_request.ibs = &ib_info;
384 ibs_request.resources = bo_list;
385 ibs_request.fence_info.handle = NULL;
386
387 for (i = 0; i < 200; i++) {
388 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
389 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
390
391 }
392
393 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
394 fence_status.context = context_handle;
395 fence_status.ip_type = AMDGPU_HW_IP_DMA;
396 fence_status.ip_instance = 0;
397 fence_status.ring = ring_id;
398 fence_status.fence = ibs_request.seq_no;
399
400 r = amdgpu_cs_query_fence_status(&fence_status,
401 AMDGPU_TIMEOUT_INFINITE,0, &expired);
402 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
403
404 pthread_join(stress_thread, NULL);
405
406 r = amdgpu_bo_list_destroy(bo_list);
407 CU_ASSERT_EQUAL(r, 0);
408
409 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
410 ib_result_mc_address, 4096);
411 CU_ASSERT_EQUAL(r, 0);
412 }
413 r = amdgpu_cs_ctx_free(context_handle);
414 CU_ASSERT_EQUAL(r, 0);
415 }
416
bad_access_helper(int reg_access)417 static void bad_access_helper(int reg_access)
418 {
419 amdgpu_context_handle context_handle;
420 amdgpu_bo_handle ib_result_handle;
421 void *ib_result_cpu;
422 uint64_t ib_result_mc_address;
423 struct amdgpu_cs_request ibs_request;
424 struct amdgpu_cs_ib_info ib_info;
425 struct amdgpu_cs_fence fence_status;
426 uint32_t expired;
427 int i, r;
428 amdgpu_bo_list_handle bo_list;
429 amdgpu_va_handle va_handle;
430
431 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
432 CU_ASSERT_EQUAL(r, 0);
433
434 r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
435 AMDGPU_GEM_DOMAIN_GTT, 0, 0,
436 &ib_result_handle, &ib_result_cpu,
437 &ib_result_mc_address, &va_handle);
438 CU_ASSERT_EQUAL(r, 0);
439
440 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
441 &bo_list);
442 CU_ASSERT_EQUAL(r, 0);
443
444 ptr = ib_result_cpu;
445 i = 0;
446
447 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
448 ptr[i++] = (reg_access ? WRITE_DATA_DST_SEL(0) : WRITE_DATA_DST_SEL(5))| WR_CONFIRM;
449 ptr[i++] = reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR : 0xdeadbee0;
450 ptr[i++] = 0;
451 ptr[i++] = 0xdeadbeef;
452
453 for (; i < 16; ++i)
454 ptr[i] = 0xffff1000;
455
456 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
457 ib_info.ib_mc_address = ib_result_mc_address;
458 ib_info.size = 16;
459
460 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
461 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
462 ibs_request.ring = 0;
463 ibs_request.number_of_ibs = 1;
464 ibs_request.ibs = &ib_info;
465 ibs_request.resources = bo_list;
466 ibs_request.fence_info.handle = NULL;
467
468 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
469 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
470
471
472 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
473 fence_status.context = context_handle;
474 fence_status.ip_type = AMDGPU_HW_IP_GFX;
475 fence_status.ip_instance = 0;
476 fence_status.ring = 0;
477 fence_status.fence = ibs_request.seq_no;
478
479 r = amdgpu_cs_query_fence_status(&fence_status,
480 AMDGPU_TIMEOUT_INFINITE,0, &expired);
481 CU_ASSERT_EQUAL((r == 0 || r == -ECANCELED), 1);
482
483 r = amdgpu_bo_list_destroy(bo_list);
484 CU_ASSERT_EQUAL(r, 0);
485
486 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
487 ib_result_mc_address, 4096);
488 CU_ASSERT_EQUAL(r, 0);
489
490 r = amdgpu_cs_ctx_free(context_handle);
491 CU_ASSERT_EQUAL(r, 0);
492 }
493
amdgpu_illegal_reg_access()494 static void amdgpu_illegal_reg_access()
495 {
496 bad_access_helper(1);
497 }
498
amdgpu_illegal_mem_access()499 static void amdgpu_illegal_mem_access()
500 {
501 bad_access_helper(0);
502 }
503
amdgpu_dispatch_hang_gfx(void)504 static void amdgpu_dispatch_hang_gfx(void)
505 {
506 amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_GFX);
507 }
508
amdgpu_dispatch_hang_compute(void)509 static void amdgpu_dispatch_hang_compute(void)
510 {
511 amdgpu_dispatch_hang_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
512 }
513
amdgpu_dispatch_hang_slow_gfx(void)514 static void amdgpu_dispatch_hang_slow_gfx(void)
515 {
516 amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_GFX);
517 }
518
amdgpu_dispatch_hang_slow_compute(void)519 static void amdgpu_dispatch_hang_slow_compute(void)
520 {
521 amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
522 }
523
amdgpu_draw_hang_gfx(void)524 static void amdgpu_draw_hang_gfx(void)
525 {
526 int r;
527 struct drm_amdgpu_info_hw_ip info;
528 uint32_t ring_id;
529
530 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
531 CU_ASSERT_EQUAL(r, 0);
532 if (!info.available_rings)
533 printf("SKIP ... as there's no graphic ring\n");
534
535 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
536 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
537 amdgpu_memcpy_draw_test(device_handle, ring_id, 1);
538 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
539 }
540 }
541
amdgpu_draw_hang_slow_gfx(void)542 static void amdgpu_draw_hang_slow_gfx(void)
543 {
544 struct drm_amdgpu_info_hw_ip info;
545 uint32_t ring_id;
546 int r;
547
548 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
549 CU_ASSERT_EQUAL(r, 0);
550
551 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
552 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
553 amdgpu_memcpy_draw_hang_slow_test(device_handle, ring_id);
554 amdgpu_memcpy_draw_test(device_handle, ring_id, 0);
555 }
556 }
557