1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include "CUnit/Basic.h"
25
26 #include "amdgpu_test.h"
27 #include "amdgpu_drm.h"
28 #include "amdgpu_internal.h"
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include "xf86drm.h"
33 #include <limits.h>
34
35 #define PATH_SIZE PATH_MAX
36
37 #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38
39 const char *ras_block_string[] = {
40 "umc",
41 "sdma",
42 "gfx",
43 "mmhub",
44 "athub",
45 "pcie_bif",
46 "hdp",
47 "xgmi_wafl",
48 "df",
49 "smn",
50 "sem",
51 "mp0",
52 "mp1",
53 "fuse",
54 };
55
56 #define ras_block_str(i) (ras_block_string[i])
57
58 enum amdgpu_ras_block {
59 AMDGPU_RAS_BLOCK__UMC = 0,
60 AMDGPU_RAS_BLOCK__SDMA,
61 AMDGPU_RAS_BLOCK__GFX,
62 AMDGPU_RAS_BLOCK__MMHUB,
63 AMDGPU_RAS_BLOCK__ATHUB,
64 AMDGPU_RAS_BLOCK__PCIE_BIF,
65 AMDGPU_RAS_BLOCK__HDP,
66 AMDGPU_RAS_BLOCK__XGMI_WAFL,
67 AMDGPU_RAS_BLOCK__DF,
68 AMDGPU_RAS_BLOCK__SMN,
69 AMDGPU_RAS_BLOCK__SEM,
70 AMDGPU_RAS_BLOCK__MP0,
71 AMDGPU_RAS_BLOCK__MP1,
72 AMDGPU_RAS_BLOCK__FUSE,
73
74 AMDGPU_RAS_BLOCK__LAST
75 };
76
77 #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
78 #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
79
80 enum amdgpu_ras_gfx_subblock {
81 /* CPC */
82 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
83 AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
84 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
85 AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
86 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
87 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
88 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
89 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
90 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
91 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
92 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
93 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
94 /* CPF */
95 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
96 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
97 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
98 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
99 AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
100 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
101 /* CPG */
102 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
103 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
104 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
105 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
106 AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
107 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
108 /* GDS */
109 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
110 AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
111 AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
112 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
113 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
114 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
115 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
116 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
117 /* SPI */
118 AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
119 /* SQ */
120 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
121 AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
122 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
123 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
124 AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
125 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
126 /* SQC (3 ranges) */
127 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
128 /* SQC range 0 */
129 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
130 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
131 AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
132 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
133 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
134 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
135 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
136 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
137 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
138 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
139 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
140 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
141 /* SQC range 1 */
142 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
143 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
144 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
145 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
146 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
147 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
148 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
149 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
150 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
151 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
152 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
153 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
154 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
155 /* SQC range 2 */
156 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
157 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
158 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
159 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
160 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
161 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
162 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
163 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
164 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
165 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
166 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
167 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
168 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
169 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
170 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
171 /* TA */
172 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
173 AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
174 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
175 AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
176 AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
177 AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
178 AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
179 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
180 /* TCA */
181 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
182 AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
183 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
184 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
185 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
186 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
187 /* TCC (5 sub-ranges) */
188 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
189 /* TCC range 0 */
190 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
191 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
192 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
193 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
194 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
195 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
196 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
197 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
198 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
199 AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
200 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
201 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
202 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
203 /* TCC range 1 */
204 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
205 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
206 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
207 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
208 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
209 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
210 /* TCC range 2 */
211 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
212 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
213 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
214 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
215 AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
216 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
217 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
218 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
219 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
220 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
221 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
222 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
223 /* TCC range 3 */
224 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
225 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
226 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
227 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
228 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
229 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
230 /* TCC range 4 */
231 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
232 AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
233 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
234 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
235 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
236 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
237 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
238 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
239 /* TCI */
240 AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
241 /* TCP */
242 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
243 AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
244 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
245 AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
246 AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
247 AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
248 AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
249 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
250 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
251 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
252 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
253 /* TD */
254 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
255 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
256 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
257 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
258 AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
259 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
260 /* EA (3 sub-ranges) */
261 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
262 /* EA range 0 */
263 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
264 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
265 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
266 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
267 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
268 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
269 AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
270 AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
271 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
272 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
273 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
274 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
275 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
276 /* EA range 1 */
277 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
278 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
279 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
280 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
281 AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
282 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
283 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
284 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
285 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
286 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
287 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
288 /* EA range 2 */
289 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
290 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
291 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
292 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
293 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
294 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
295 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
296 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
297 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
298 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
299 /* UTC VM L2 bank */
300 AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
301 /* UTC VM walker */
302 AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
303 /* UTC ATC L2 2MB cache */
304 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
305 /* UTC ATC L2 4KB cache */
306 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
307 AMDGPU_RAS_BLOCK__GFX_MAX
308 };
309
310 enum amdgpu_ras_error_type {
311 AMDGPU_RAS_ERROR__NONE = 0,
312 AMDGPU_RAS_ERROR__PARITY = 1,
313 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2,
314 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
315 AMDGPU_RAS_ERROR__POISON = 8,
316 };
317
318 struct ras_inject_test_config {
319 char name[64];
320 char block[32];
321 int sub_block;
322 enum amdgpu_ras_error_type type;
323 uint64_t address;
324 uint64_t value;
325 };
326
327 struct ras_common_if {
328 enum amdgpu_ras_block block;
329 enum amdgpu_ras_error_type type;
330 uint32_t sub_block_index;
331 char name[32];
332 };
333
334 struct ras_inject_if {
335 struct ras_common_if head;
336 uint64_t address;
337 uint64_t value;
338 };
339
340 struct ras_debug_if {
341 union {
342 struct ras_common_if head;
343 struct ras_inject_if inject;
344 };
345 int op;
346 };
347 /* for now, only umc, gfx, sdma has implemented. */
348 #define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
349 (1 << AMDGPU_RAS_BLOCK__GFX))
350 #define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
351 (1 << AMDGPU_RAS_BLOCK__GFX))
352 #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
353 (1 << AMDGPU_RAS_BLOCK__SDMA) |\
354 (1 << AMDGPU_RAS_BLOCK__GFX))
355
356 static uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT;
357 static uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT;
358 static uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC;
359
360 struct ras_test_mask {
361 uint32_t inject_mask;
362 uint32_t query_mask;
363 uint32_t basic_mask;
364 };
365
366 struct amdgpu_ras_data {
367 amdgpu_device_handle device_handle;
368 uint32_t id;
369 uint32_t capability;
370 struct ras_test_mask test_mask;
371 };
372
373 /* all devices who has ras supported */
374 static struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED];
375 static int devices_count;
376
377 struct ras_DID_test_mask{
378 uint16_t device_id;
379 uint16_t revision_id;
380 struct ras_test_mask test_mask;
381 };
382
383 /* white list for inject test. */
384 #define RAS_BLOCK_MASK_ALL {\
385 DEFAULT_RAS_BLOCK_MASK_INJECT,\
386 DEFAULT_RAS_BLOCK_MASK_QUERY,\
387 DEFAULT_RAS_BLOCK_MASK_BASIC\
388 }
389
390 #define RAS_BLOCK_MASK_QUERY_BASIC {\
391 0,\
392 DEFAULT_RAS_BLOCK_MASK_QUERY,\
393 DEFAULT_RAS_BLOCK_MASK_BASIC\
394 }
395
396 static const struct ras_inject_test_config umc_ras_inject_test[] = {
397 {"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
398 };
399
400 static const struct ras_inject_test_config gfx_ras_inject_test[] = {
401 {"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
402 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
403 {"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
404 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
405 {"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
406 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
407 {"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
408 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
409 {"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
410 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
411 {"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM,
412 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
413 {"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM,
414 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
415 {"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO,
416 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
417 {"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA,
418 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
419 {"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
420 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
421 {"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
422 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
423 {"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
424 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
425 {"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM,
426 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
427 {"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO,
428 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
429 {"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM,
430 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
431 };
432
433 static const struct ras_DID_test_mask ras_DID_array[] = {
434 {0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
435 {0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
436 {0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
437 };
438
amdgpu_ras_find_block_id_by_name(const char * name)439 static uint32_t amdgpu_ras_find_block_id_by_name(const char *name)
440 {
441 int i;
442
443 for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
444 if (strcmp(name, ras_block_string[i]) == 0)
445 return i;
446 }
447
448 return ARRAY_SIZE(ras_block_string);
449 }
450
amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)451 static char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)
452 {
453 switch (type) {
454 case AMDGPU_RAS_ERROR__PARITY:
455 return "parity";
456 case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
457 return "single_correctable";
458 case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
459 return "multi_uncorrectable";
460 case AMDGPU_RAS_ERROR__POISON:
461 return "poison";
462 case AMDGPU_RAS_ERROR__NONE:
463 default:
464 return NULL;
465 }
466 }
467
amdgpu_ras_get_test_mask(drmDevicePtr device)468 static struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
469 {
470 int i;
471 static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC;
472
473 for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) {
474 if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id &&
475 ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id)
476 return ras_DID_array[i].test_mask;
477 }
478 return default_test_mask;
479 }
480
amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)481 static uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)
482 {
483 union {
484 uint64_t feature_mask;
485 struct {
486 uint32_t enabled_features;
487 uint32_t supported_features;
488 };
489 } features = { 0 };
490 int ret;
491
492 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
493 sizeof(features), &features);
494 if (ret)
495 return 0;
496
497 return features.supported_features;
498 }
499
500 static int get_file_contents(char *file, char *buf, int size);
501
amdgpu_ras_lookup_id(drmDevicePtr device)502 static int amdgpu_ras_lookup_id(drmDevicePtr device)
503 {
504 char path[PATH_SIZE];
505 char str[128];
506 drmPciBusInfo info;
507 int i;
508 int ret;
509
510 for (i = 0; i < MAX_CARDS_SUPPORTED; i++) {
511 memset(str, 0, sizeof(str));
512 memset(&info, 0, sizeof(info));
513 snprintf(path, PATH_SIZE, "/sys/kernel/debug/dri/%d/name", i);
514 if (get_file_contents(path, str, sizeof(str)) <= 0)
515 continue;
516
517 ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx",
518 &info.domain, &info.bus, &info.dev, &info.func);
519 if (ret != 4)
520 continue;
521
522 if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0)
523 return i;
524 }
525 return -1;
526 }
527
528 //helpers
529
530 static int test_card;
531 static char sysfs_path[PATH_SIZE];
532 static char debugfs_path[PATH_SIZE];
533 static uint32_t ras_mask;
534 static amdgpu_device_handle device_handle;
535
set_test_card(int card)536 static void set_test_card(int card)
537 {
538 test_card = card;
539 snprintf(sysfs_path, PATH_SIZE, "/sys/class/drm/card%d/device/ras/", devices[card].id);
540 snprintf(debugfs_path, PATH_SIZE, "/sys/kernel/debug/dri/%d/ras/", devices[card].id);
541 ras_mask = devices[card].capability;
542 device_handle = devices[card].device_handle;
543 ras_block_mask_inject = devices[card].test_mask.inject_mask;
544 ras_block_mask_query = devices[card].test_mask.query_mask;
545 ras_block_mask_basic = devices[card].test_mask.basic_mask;
546 }
547
get_ras_sysfs_root(void)548 static const char *get_ras_sysfs_root(void)
549 {
550 return sysfs_path;
551 }
552
get_ras_debugfs_root(void)553 static const char *get_ras_debugfs_root(void)
554 {
555 return debugfs_path;
556 }
557
set_file_contents(char * file,char * buf,int size)558 static int set_file_contents(char *file, char *buf, int size)
559 {
560 int n, fd;
561 fd = open(file, O_WRONLY);
562 if (fd == -1)
563 return -1;
564 n = write(fd, buf, size);
565 close(fd);
566 return n;
567 }
568
get_file_contents(char * file,char * buf,int size)569 static int get_file_contents(char *file, char *buf, int size)
570 {
571 int n, fd;
572 fd = open(file, O_RDONLY);
573 if (fd == -1)
574 return -1;
575 n = read(fd, buf, size);
576 close(fd);
577 return n;
578 }
579
is_file_ok(char * file,int flags)580 static int is_file_ok(char *file, int flags)
581 {
582 int fd;
583
584 fd = open(file, flags);
585 if (fd == -1)
586 return -1;
587 close(fd);
588 return 0;
589 }
590
amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)591 static int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)
592 {
593 uint32_t feature_mask;
594 int ret;
595
596 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
597 sizeof(feature_mask), &feature_mask);
598 if (ret)
599 return -1;
600
601 return (1 << block) & feature_mask;
602 }
603
amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)604 static int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)
605 {
606 return (1 << block) & ras_mask;
607 }
608
amdgpu_ras_invoke(struct ras_debug_if * data)609 static int amdgpu_ras_invoke(struct ras_debug_if *data)
610 {
611 char path[PATH_SIZE];
612 int ret;
613
614 snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
615 strncat(path, "ras_ctrl", sizeof(path) - strlen(path));
616
617 ret = set_file_contents(path, (char *)data, sizeof(*data))
618 - sizeof(*data);
619 return ret;
620 }
621
amdgpu_ras_query_err_count(enum amdgpu_ras_block block,unsigned long * ue,unsigned long * ce)622 static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
623 unsigned long *ue, unsigned long *ce)
624 {
625 char buf[64];
626 char name[PATH_SIZE];
627
628 *ue = *ce = 0;
629
630 if (amdgpu_ras_is_feature_supported(block) <= 0)
631 return -1;
632
633 snprintf(name, sizeof(name), "%s", get_ras_sysfs_root());
634 strncat(name, ras_block_str(block), sizeof(name) - strlen(name));
635 strncat(name, "_err_count", sizeof(name) - strlen(name));
636
637 if (is_file_ok(name, O_RDONLY))
638 return 0;
639
640 if (get_file_contents(name, buf, sizeof(buf)) <= 0)
641 return -1;
642
643 if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2)
644 return -1;
645
646 return 0;
647 }
648
amdgpu_ras_inject(enum amdgpu_ras_block block,uint32_t sub_block,enum amdgpu_ras_error_type type,uint64_t address,uint64_t value)649 static int amdgpu_ras_inject(enum amdgpu_ras_block block,
650 uint32_t sub_block, enum amdgpu_ras_error_type type,
651 uint64_t address, uint64_t value)
652 {
653 struct ras_debug_if data = { .op = 2, };
654 struct ras_inject_if *inject = &data.inject;
655 int ret;
656
657 if (amdgpu_ras_is_feature_enabled(block) <= 0) {
658 fprintf(stderr, "block id(%d) is not valid\n", block);
659 return -1;
660 }
661
662 inject->head.block = block;
663 inject->head.type = type;
664 inject->head.sub_block_index = sub_block;
665 strncpy(inject->head.name, ras_block_str(block), sizeof(inject->head.name)-1);
666 inject->address = address;
667 inject->value = value;
668
669 ret = amdgpu_ras_invoke(&data);
670 CU_ASSERT_EQUAL(ret, 0);
671 if (ret)
672 return -1;
673
674 return 0;
675 }
676
677 //tests
amdgpu_ras_features_test(int enable)678 static void amdgpu_ras_features_test(int enable)
679 {
680 struct ras_debug_if data;
681 int ret;
682 int i;
683
684 data.op = enable;
685 for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
686 struct ras_common_if head = {
687 .block = i,
688 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
689 .sub_block_index = 0,
690 .name = "",
691 };
692
693 if (amdgpu_ras_is_feature_supported(i) <= 0)
694 continue;
695
696 data.head = head;
697
698 ret = amdgpu_ras_invoke(&data);
699 CU_ASSERT_EQUAL(ret, 0);
700
701 if (ret)
702 continue;
703
704 ret = enable ^ amdgpu_ras_is_feature_enabled(i);
705 CU_ASSERT_EQUAL(ret, 0);
706 }
707 }
708
amdgpu_ras_disable_test(void)709 static void amdgpu_ras_disable_test(void)
710 {
711 int i;
712 for (i = 0; i < devices_count; i++) {
713 set_test_card(i);
714 amdgpu_ras_features_test(0);
715 }
716 }
717
amdgpu_ras_enable_test(void)718 static void amdgpu_ras_enable_test(void)
719 {
720 int i;
721 for (i = 0; i < devices_count; i++) {
722 set_test_card(i);
723 amdgpu_ras_features_test(1);
724 }
725 }
726
__amdgpu_ras_ip_inject_test(const struct ras_inject_test_config * ip_test,uint32_t size)727 static void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test,
728 uint32_t size)
729 {
730 int i, ret;
731 unsigned long old_ue, old_ce;
732 unsigned long ue, ce;
733 uint32_t block;
734 int timeout;
735 bool pass;
736
737 for (i = 0; i < size; i++) {
738 timeout = 3;
739 pass = false;
740
741 block = amdgpu_ras_find_block_id_by_name(ip_test[i].block);
742
743 /* Ensure one valid ip block */
744 if (block == ARRAY_SIZE(ras_block_string))
745 break;
746
747 /* Ensure RAS feature for the IP block is enabled by kernel */
748 if (amdgpu_ras_is_feature_supported(block) <= 0)
749 break;
750
751 ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce);
752 CU_ASSERT_EQUAL(ret, 0);
753 if (ret)
754 break;
755
756 ret = amdgpu_ras_inject(block,
757 ip_test[i].sub_block,
758 ip_test[i].type,
759 ip_test[i].address,
760 ip_test[i].value);
761 CU_ASSERT_EQUAL(ret, 0);
762 if (ret)
763 break;
764
765 while (timeout > 0) {
766 sleep(5);
767
768 ret = amdgpu_ras_query_err_count(block, &ue, &ce);
769 CU_ASSERT_EQUAL(ret, 0);
770 if (ret)
771 break;
772
773 if (old_ue != ue || old_ce != ce) {
774 pass = true;
775 sleep(20);
776 break;
777 }
778 timeout -= 1;
779 }
780 printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n",
781 ip_test[i].name,
782 ip_test[i].block,
783 ip_test[i].sub_block,
784 amdgpu_ras_get_error_type_id(ip_test[i].type),
785 ip_test[i].address,
786 ip_test[i].value,
787 pass ? "Pass" : "Fail");
788 }
789 }
790
__amdgpu_ras_inject_test(void)791 static void __amdgpu_ras_inject_test(void)
792 {
793 printf("...\n");
794
795 /* run UMC ras inject test */
796 __amdgpu_ras_ip_inject_test(umc_ras_inject_test,
797 ARRAY_SIZE(umc_ras_inject_test));
798
799 /* run GFX ras inject test */
800 __amdgpu_ras_ip_inject_test(gfx_ras_inject_test,
801 ARRAY_SIZE(gfx_ras_inject_test));
802 }
803
amdgpu_ras_inject_test(void)804 static void amdgpu_ras_inject_test(void)
805 {
806 int i;
807 for (i = 0; i < devices_count; i++) {
808 set_test_card(i);
809 __amdgpu_ras_inject_test();
810 }
811 }
812
__amdgpu_ras_query_test(void)813 static void __amdgpu_ras_query_test(void)
814 {
815 unsigned long ue, ce;
816 int ret;
817 int i;
818
819 for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
820 if (amdgpu_ras_is_feature_supported(i) <= 0)
821 continue;
822
823 if (!((1 << i) & ras_block_mask_query))
824 continue;
825
826 ret = amdgpu_ras_query_err_count(i, &ue, &ce);
827 CU_ASSERT_EQUAL(ret, 0);
828 }
829 }
830
amdgpu_ras_query_test(void)831 static void amdgpu_ras_query_test(void)
832 {
833 int i;
834 for (i = 0; i < devices_count; i++) {
835 set_test_card(i);
836 __amdgpu_ras_query_test();
837 }
838 }
839
amdgpu_ras_basic_test(void)840 static void amdgpu_ras_basic_test(void)
841 {
842 int ret;
843 int i;
844 int j;
845 uint32_t features;
846 char path[PATH_SIZE];
847
848 ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY);
849 CU_ASSERT_EQUAL(ret, 0);
850
851 for (i = 0; i < devices_count; i++) {
852 set_test_card(i);
853
854 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
855 sizeof(features), &features);
856 CU_ASSERT_EQUAL(ret, 0);
857
858 snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
859 strncat(path, "ras_ctrl", sizeof(path) - strlen(path));
860
861 ret = is_file_ok(path, O_WRONLY);
862 CU_ASSERT_EQUAL(ret, 0);
863
864 snprintf(path, sizeof(path), "%s", get_ras_sysfs_root());
865 strncat(path, "features", sizeof(path) - strlen(path));
866
867 ret = is_file_ok(path, O_RDONLY);
868 CU_ASSERT_EQUAL(ret, 0);
869
870 for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) {
871 ret = amdgpu_ras_is_feature_supported(j);
872 if (ret <= 0)
873 continue;
874
875 if (!((1 << j) & ras_block_mask_basic))
876 continue;
877
878 snprintf(path, sizeof(path), "%s", get_ras_sysfs_root());
879 strncat(path, ras_block_str(j), sizeof(path) - strlen(path));
880 strncat(path, "_err_count", sizeof(path) - strlen(path));
881
882 ret = is_file_ok(path, O_RDONLY);
883 CU_ASSERT_EQUAL(ret, 0);
884
885 snprintf(path, sizeof(path), "%s", get_ras_debugfs_root());
886 strncat(path, ras_block_str(j), sizeof(path) - strlen(path));
887 strncat(path, "_err_inject", sizeof(path) - strlen(path));
888
889 ret = is_file_ok(path, O_WRONLY);
890 CU_ASSERT_EQUAL(ret, 0);
891 }
892 }
893 }
894
895 CU_TestInfo ras_tests[] = {
896 { "ras basic test", amdgpu_ras_basic_test },
897 { "ras query test", amdgpu_ras_query_test },
898 { "ras inject test", amdgpu_ras_inject_test },
899 { "ras disable test", amdgpu_ras_disable_test },
900 { "ras enable test", amdgpu_ras_enable_test },
901 CU_TEST_INFO_NULL,
902 };
903
suite_ras_tests_enable(void)904 CU_BOOL suite_ras_tests_enable(void)
905 {
906 amdgpu_device_handle device_handle;
907 uint32_t major_version;
908 uint32_t minor_version;
909 int i;
910 drmDevicePtr device;
911
912 for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
913 if (amdgpu_device_initialize(drm_amdgpu[i], &major_version,
914 &minor_version, &device_handle))
915 continue;
916
917 if (drmGetDevice2(drm_amdgpu[i],
918 DRM_DEVICE_GET_PCI_REVISION,
919 &device))
920 continue;
921
922 if (device->bustype == DRM_BUS_PCI &&
923 amdgpu_ras_lookup_capability(device_handle)) {
924 amdgpu_device_deinitialize(device_handle);
925 return CU_TRUE;
926 }
927
928 if (amdgpu_device_deinitialize(device_handle))
929 continue;
930 }
931
932 return CU_FALSE;
933 }
934
suite_ras_tests_init(void)935 int suite_ras_tests_init(void)
936 {
937 drmDevicePtr device;
938 amdgpu_device_handle device_handle;
939 uint32_t major_version;
940 uint32_t minor_version;
941 uint32_t capability;
942 struct ras_test_mask test_mask;
943 int id;
944 int i;
945 int r;
946
947 for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
948 r = amdgpu_device_initialize(drm_amdgpu[i], &major_version,
949 &minor_version, &device_handle);
950 if (r)
951 continue;
952
953 if (drmGetDevice2(drm_amdgpu[i],
954 DRM_DEVICE_GET_PCI_REVISION,
955 &device)) {
956 amdgpu_device_deinitialize(device_handle);
957 continue;
958 }
959
960 if (device->bustype != DRM_BUS_PCI) {
961 amdgpu_device_deinitialize(device_handle);
962 continue;
963 }
964
965 capability = amdgpu_ras_lookup_capability(device_handle);
966 if (capability == 0) {
967 amdgpu_device_deinitialize(device_handle);
968 continue;
969
970 }
971
972 id = amdgpu_ras_lookup_id(device);
973 if (id == -1) {
974 amdgpu_device_deinitialize(device_handle);
975 continue;
976 }
977
978 test_mask = amdgpu_ras_get_test_mask(device);
979
980 devices[devices_count++] = (struct amdgpu_ras_data) {
981 device_handle, id, capability, test_mask,
982 };
983 }
984
985 if (devices_count == 0)
986 return CUE_SINIT_FAILED;
987
988 return CUE_SUCCESS;
989 }
990
suite_ras_tests_clean(void)991 int suite_ras_tests_clean(void)
992 {
993 int r;
994 int i;
995 int ret = CUE_SUCCESS;
996
997 for (i = 0; i < devices_count; i++) {
998 r = amdgpu_device_deinitialize(devices[i].device_handle);
999 if (r)
1000 ret = CUE_SCLEAN_FAILED;
1001 }
1002 return ret;
1003 }
1004