1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Ben Widawsky <ben@bwidawsk.net>
25 *
26 */
27
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <getopt.h>
37 #include "intel_chipset.h"
38 #include "intel_io.h"
39 #include "igt_sysfs.h"
40 #include "drmtest.h"
41 #include "config.h"
42 #include <libudev.h>
43 #include <syslog.h>
44 #include "intel_l3_parity.h"
45
46 static unsigned int devid;
47 /* L3 size is always a function of banks. The number of banks cannot be
48 * determined by number of slices however */
num_banks(void)49 static inline int num_banks(void) {
50 switch (intel_gt(devid)) {
51 case 2: return 8;
52 case 1: return 4;
53 default: return 2;
54 }
55 }
56 #define NUM_SUBBANKS 8
57 #define BYTES_PER_BANK (128 << 10)
58 /* Each row addresses [up to] 4b. This multiplied by the number of subbanks
59 * will give the L3 size per bank.
60 * TODO: Row size is fixed on IVB, and variable on HSW.*/
61 #define MAX_ROW (1<<12)
62 #define MAX_BANKS_PER_SLICE 4
63 #define NUM_REGS (MAX_BANKS_PER_SLICE * NUM_SUBBANKS)
64 #define MAX_SLICES (intel_gt(devid) > 1 ? 2 : 1)
65 #define REAL_MAX_SLICES 2
66 /* TODO support SLM config */
67 #define L3_SIZE ((MAX_ROW * 4) * NUM_SUBBANKS * num_banks())
68
69 struct __attribute__ ((__packed__)) l3_log_register {
70 uint32_t row0_enable : 1;
71 uint32_t rsvd2 : 4;
72 uint32_t row0 : 11;
73 uint32_t row1_enable : 1;
74 uint32_t rsvd1 : 4;
75 uint32_t row1 : 11;
76 } l3logs[REAL_MAX_SLICES][MAX_BANKS_PER_SLICE][NUM_SUBBANKS];
77
78 static int which_slice = -1;
79 #define for_each_slice(__i) \
80 for ((__i) = (which_slice == -1) ? 0 : which_slice; \
81 (__i) < ((which_slice == -1) ? MAX_SLICES : (which_slice + 1)); \
82 (__i)++)
83
decode_dft(uint32_t dft)84 static void decode_dft(uint32_t dft)
85 {
86 if (IS_IVYBRIDGE(devid) || !(dft & 1)) {
87 printf("Error injection disabled\n");
88 return;
89 }
90 printf("Error injection enabled\n");
91 printf(" Hang = %s\n", (dft >> 28) & 0x1 ? "yes" : "no");
92 printf(" Row = %d\n", (dft >> 7) & 0x7ff);
93 printf(" Bank = %d\n", (dft >> 2) & 0x3);
94 printf(" Subbank = %d\n", (dft >> 4) & 0x7);
95 printf(" Slice = %d\n", (dft >> 1) & 0x1);
96 }
97
dumpit(int slice)98 static void dumpit(int slice)
99 {
100 int i, j;
101
102 for (i = 0; i < MAX_BANKS_PER_SLICE; i++) {
103 for (j = 0; j < NUM_SUBBANKS; j++) {
104 struct l3_log_register *reg = &l3logs[slice][i][j];
105
106 if (reg->row0_enable)
107 printf("Slice %d, Row %d, Bank %d, Subbank %d is disabled\n",
108 slice, reg->row0, i, j);
109 if (reg->row1_enable)
110 printf("Slice %d, Row %d, Bank %d, Subbank %d is disabled\n",
111 slice, reg->row1, i, j);
112 }
113 }
114 }
115
disable_rbs(int row,int bank,int sbank,int slice)116 static int disable_rbs(int row, int bank, int sbank, int slice)
117 {
118 struct l3_log_register *reg = &l3logs[slice][bank][sbank];
119
120 // can't map more than 2 rows
121 if (reg->row0_enable && reg->row1_enable)
122 return -1;
123
124 // can't remap the same row twice
125 if ((reg->row0_enable && reg->row0 == row) ||
126 (reg->row1_enable && reg->row1 == row)) {
127 return -1;
128 }
129
130 if (reg->row0_enable) {
131 reg->row1 = row;
132 reg->row1_enable = 1;
133 } else {
134 reg->row0 = row;
135 reg->row0_enable = 1;
136 }
137
138 return 0;
139 }
140
enables_rbs(int row,int bank,int sbank,int slice)141 static void enables_rbs(int row, int bank, int sbank, int slice)
142 {
143 struct l3_log_register *reg = &l3logs[slice][bank][sbank];
144
145 if (!reg->row0_enable && !reg->row1_enable)
146 return;
147
148 if (reg->row1_enable && reg->row1 == row)
149 reg->row1_enable = 0;
150 else if (reg->row0_enable && reg->row0 == row)
151 reg->row0_enable = 0;
152 }
153
usage(const char * name)154 static void usage(const char *name)
155 {
156 printf("usage: %s [OPTIONS] [ACTION]\n"
157 "Operate on the i915 L3 GPU cache (should be run as root)\n\n"
158 " OPTIONS:\n"
159 " -r, --row=[row] The row to act upon (default 0)\n"
160 " -b, --bank=[bank] The bank to act upon (default 0)\n"
161 " -s, --subbank=[subbank] The subbank to act upon (default 0)\n"
162 " -w, --slice=[slice] Which slice to act on (default: -1 [all])\n"
163 " , --daemon Run the listener (-L) as a daemon\n"
164 " ACTIONS (only 1 may be specified at a time):\n"
165 " -h, --help Display this help\n"
166 " -H, --hw-info Display the current L3 properties\n"
167 " -l, --list List the current L3 logs\n"
168 " -a, --clear-all Clear all disabled rows\n"
169 " -e, --enable Enable row, bank, subbank (undo -d)\n"
170 " -d, --disable=<row,bank,subbank> Disable row, bank, subbank (inline arguments are deprecated. Please use -r, -b, -s instead\n"
171 " -i, --inject [HSW only] Cause hardware to inject a row errors\n"
172 " -u, --uninject [HSW only] Turn off hardware error injectection (undo -i)\n"
173 " -L, --listen Listen for uevent errors\n",
174 name);
175 }
176
main(int argc,char * argv[])177 int main(int argc, char *argv[])
178 {
179 const char *path[REAL_MAX_SLICES] = {"l3_parity", "l3_parity_slice_1"};
180 int row = 0, bank = 0, sbank = 0;
181 int fd[REAL_MAX_SLICES] = {0}, ret, i;
182 int action = '0';
183 int daemonize = 0;
184 int device, dir;
185 uint32_t dft;
186
187 device = drm_open_driver(DRIVER_INTEL);
188 devid = intel_get_drm_devid(device);
189 if (intel_gen(devid) < 7 || IS_VALLEYVIEW(devid))
190 exit(77);
191
192 assert(intel_register_access_init(intel_get_pci_device(), 0, device) == 0);
193
194 dir = igt_sysfs_open(device);
195
196 for_each_slice(i) {
197 fd[i] = openat(dir, path[i], O_RDWR);
198 if (fd[i] < 0) {
199 if (i == 0) /* at least one slice must be supported */
200 exit(77);
201 continue;
202 }
203
204 if (read(fd[i], l3logs[i], NUM_REGS * sizeof(uint32_t)) < 0) {
205 perror(path[i]);
206 exit(77);
207 }
208 assert(lseek(fd[i], 0, SEEK_SET) == 0);
209 }
210
211 close(dir);
212
213 /* NB: It is potentially unsafe to read this register if the kernel is
214 * actively using this register range, or we're running multiple
215 * instances of this tool. Since neither of those cases should occur
216 * (and the tool should be root only) we can safely ignore this for
217 * now. Just be aware of this if for some reason a hang is reported
218 * when using this tool.
219 */
220 dft = intel_register_read(0xb038);
221
222 while (1) {
223 int c, option_index = 0;
224 struct option long_options[] = {
225 { "help", no_argument, 0, 'h' },
226 { "list", no_argument, 0, 'l' },
227 { "clear-all", no_argument, 0, 'a' },
228 { "enable", no_argument, 0, 'e' },
229 { "disable", optional_argument, 0, 'd' },
230 { "inject", no_argument, 0, 'i' },
231 { "uninject", no_argument, 0, 'u' },
232 { "hw-info", no_argument, 0, 'H' },
233 { "listen", no_argument, 0, 'L' },
234 { "row", required_argument, 0, 'r' },
235 { "bank", required_argument, 0, 'b' },
236 { "subbank", required_argument, 0, 's' },
237 { "slice", required_argument, 0, 'w' },
238 { "daemon", no_argument, &daemonize, 1 },
239 {0, 0, 0, 0}
240 };
241
242 c = getopt_long(argc, argv, "hHr:b:s:w:aled::iuL", long_options,
243 &option_index);
244 if (c == -1)
245 break;
246
247 if (c == 0)
248 continue;
249
250 switch (c) {
251 case '?':
252 case 'h':
253 usage(argv[0]);
254 exit(EXIT_SUCCESS);
255 case 'H':
256 printf("Number of slices: %d\n", MAX_SLICES);
257 printf("Number of banks: %d\n", num_banks());
258 printf("Subbanks per bank: %d\n", NUM_SUBBANKS);
259 printf("Max L3 size: %dK\n", L3_SIZE >> 10);
260 printf("Has error injection: %s\n", IS_HASWELL(devid) ? "yes" : "no");
261 exit(EXIT_SUCCESS);
262 case 'r':
263 row = atoi(optarg);
264 if (row >= MAX_ROW)
265 exit(EXIT_FAILURE);
266 break;
267 case 'b':
268 bank = atoi(optarg);
269 if (bank >= num_banks() || bank >= MAX_BANKS_PER_SLICE)
270 exit(EXIT_FAILURE);
271 break;
272 case 's':
273 sbank = atoi(optarg);
274 if (sbank >= NUM_SUBBANKS)
275 exit(EXIT_FAILURE);
276 break;
277 case 'w':
278 which_slice = atoi(optarg);
279 if (which_slice >= MAX_SLICES)
280 exit(EXIT_FAILURE);
281 break;
282 case 'i':
283 case 'u':
284 if (!IS_HASWELL(devid)) {
285 fprintf(stderr, "Error injection supported on HSW+ only\n");
286 exit(EXIT_FAILURE);
287 }
288 case 'd':
289 if (optarg) {
290 ret = sscanf(optarg, "%d,%d,%d", &row, &bank, &sbank);
291 if (ret != 3)
292 exit(EXIT_FAILURE);
293 }
294 case 'a':
295 case 'l':
296 case 'e':
297 case 'L':
298 if (action != '0') {
299 fprintf(stderr, "Only one action may be specified\n");
300 exit(EXIT_FAILURE);
301 }
302 action = c;
303 break;
304 default:
305 abort();
306 }
307 }
308
309 if (action == 'i') {
310 if (((dft >> 1) & 1) != which_slice) {
311 fprintf(stderr, "DFT register already has slice %d enabled, and we don't support multiple slices. Try modifying -w; but sometimes the register sticks in the wrong way\n", (dft >> 1) & 1);
312 exit(EXIT_FAILURE);
313 }
314
315 if (which_slice == -1) {
316 fprintf(stderr, "Cannot inject errors to multiple slices (modify -w)\n");
317 exit(EXIT_FAILURE);
318 }
319 if (dft & 1 && ((dft >> 1) && 1) == which_slice)
320 printf("warning: overwriting existing injections. This is very dangerous.\n");
321 }
322
323 /* Daemon doesn't work like the other commands */
324 if (action == 'L') {
325 struct l3_parity par;
326 struct l3_location loc;
327 if (daemonize) {
328 assert(daemon(0, 0) == 0);
329 openlog(argv[0], LOG_CONS | LOG_PID, LOG_USER);
330 }
331 memset(&par, 0, sizeof(par));
332 assert(l3_uevent_setup(&par) == 0);
333 assert(l3_listen(&par, daemonize == 1, &loc) == 0);
334 exit(EXIT_SUCCESS);
335 }
336
337 if (action == 'l')
338 decode_dft(dft);
339
340 /* Per slice operations */
341 for_each_slice(i) {
342 if (fd[i] < 0)
343 continue;
344
345 switch (action) {
346 case 'l':
347 dumpit(i);
348 break;
349 case 'a':
350 memset(l3logs[i], 0, NUM_REGS * sizeof(struct l3_log_register));
351 break;
352 case 'e':
353 enables_rbs(row, bank, sbank, i);
354 break;
355 case 'd':
356 assert(disable_rbs(row, bank, sbank, i) == 0);
357 break;
358 case 'i':
359 if (bank == 3) {
360 fprintf(stderr, "The hardware does not support error inject on bank 3.\n");
361 exit(EXIT_FAILURE);
362 }
363 dft |= row << 7;
364 dft |= sbank << 4;
365 dft |= bank << 2;
366 assert(i < 2);
367 dft |= i << 1; /* slice */
368 dft |= 1 << 0; /* enable */
369 intel_register_write(0xb038, dft);
370 break;
371 case 'u':
372 intel_register_write(0xb038, dft & ~(1<<0));
373 break;
374 case 'L':
375 break;
376 default:
377 abort();
378 }
379 }
380
381 intel_register_access_fini();
382 if (action == 'l')
383 exit(EXIT_SUCCESS);
384
385 for_each_slice(i) {
386 if (fd[i] < 0)
387 continue;
388
389 ret = write(fd[i], l3logs[i], NUM_REGS * sizeof(uint32_t));
390 if (ret == -1) {
391 perror("Writing sysfs");
392 exit(EXIT_FAILURE);
393 }
394 close(fd[i]);
395 }
396
397
398 exit(EXIT_SUCCESS);
399 }
400