• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Generic EDAC defs
3  *
4  * Author: Dave Jiang <djiang@mvista.com>
5  *
6  * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under
7  * the terms of the GNU General Public License version 2. This program
8  * is licensed "as is" without any warranty of any kind, whether express
9  * or implied.
10  *
11  */
12 #ifndef _LINUX_EDAC_H_
13 #define _LINUX_EDAC_H_
14 
15 #include <linux/atomic.h>
16 #include <linux/device.h>
17 #include <linux/completion.h>
18 #include <linux/workqueue.h>
19 #include <linux/debugfs.h>
20 #include <linux/numa.h>
21 
22 struct device;
23 
24 #define EDAC_OPSTATE_INVAL	-1
25 #define EDAC_OPSTATE_POLL	0
26 #define EDAC_OPSTATE_NMI	1
27 #define EDAC_OPSTATE_INT	2
28 
29 extern int edac_op_state;
30 extern int edac_err_assert;
31 extern atomic_t edac_handlers;
32 extern struct bus_type edac_subsys;
33 
34 extern int edac_handler_set(void);
35 extern void edac_atomic_assert_error(void);
36 extern struct bus_type *edac_get_sysfs_subsys(void);
37 extern void edac_put_sysfs_subsys(void);
38 
39 enum {
40 	EDAC_REPORTING_ENABLED,
41 	EDAC_REPORTING_DISABLED,
42 	EDAC_REPORTING_FORCE
43 };
44 
45 extern int edac_report_status;
46 #ifdef CONFIG_EDAC
get_edac_report_status(void)47 static inline int get_edac_report_status(void)
48 {
49 	return edac_report_status;
50 }
51 
set_edac_report_status(int new)52 static inline void set_edac_report_status(int new)
53 {
54 	edac_report_status = new;
55 }
56 #else
get_edac_report_status(void)57 static inline int get_edac_report_status(void)
58 {
59 	return EDAC_REPORTING_DISABLED;
60 }
61 
set_edac_report_status(int new)62 static inline void set_edac_report_status(int new)
63 {
64 }
65 #endif
66 
opstate_init(void)67 static inline void opstate_init(void)
68 {
69 	switch (edac_op_state) {
70 	case EDAC_OPSTATE_POLL:
71 	case EDAC_OPSTATE_NMI:
72 		break;
73 	default:
74 		edac_op_state = EDAC_OPSTATE_POLL;
75 	}
76 	return;
77 }
78 
79 /* Max length of a DIMM label*/
80 #define EDAC_MC_LABEL_LEN	31
81 
82 /* Maximum size of the location string */
83 #define LOCATION_SIZE 256
84 
85 /* Defines the maximum number of labels that can be reported */
86 #define EDAC_MAX_LABELS		8
87 
88 /* String used to join two or more labels */
89 #define OTHER_LABEL " or "
90 
91 /**
92  * enum dev_type - describe the type of memory DRAM chips used at the stick
93  * @DEV_UNKNOWN:	Can't be determined, or MC doesn't support detect it
94  * @DEV_X1:		1 bit for data
95  * @DEV_X2:		2 bits for data
96  * @DEV_X4:		4 bits for data
97  * @DEV_X8:		8 bits for data
98  * @DEV_X16:		16 bits for data
99  * @DEV_X32:		32 bits for data
100  * @DEV_X64:		64 bits for data
101  *
102  * Typical values are x4 and x8.
103  */
104 enum dev_type {
105 	DEV_UNKNOWN = 0,
106 	DEV_X1,
107 	DEV_X2,
108 	DEV_X4,
109 	DEV_X8,
110 	DEV_X16,
111 	DEV_X32,		/* Do these parts exist? */
112 	DEV_X64			/* Do these parts exist? */
113 };
114 
115 #define DEV_FLAG_UNKNOWN	BIT(DEV_UNKNOWN)
116 #define DEV_FLAG_X1		BIT(DEV_X1)
117 #define DEV_FLAG_X2		BIT(DEV_X2)
118 #define DEV_FLAG_X4		BIT(DEV_X4)
119 #define DEV_FLAG_X8		BIT(DEV_X8)
120 #define DEV_FLAG_X16		BIT(DEV_X16)
121 #define DEV_FLAG_X32		BIT(DEV_X32)
122 #define DEV_FLAG_X64		BIT(DEV_X64)
123 
124 /**
125  * enum hw_event_mc_err_type - type of the detected error
126  *
127  * @HW_EVENT_ERR_CORRECTED:	Corrected Error - Indicates that an ECC
128  *				corrected error was detected
129  * @HW_EVENT_ERR_UNCORRECTED:	Uncorrected Error - Indicates an error that
130  *				can't be corrected by ECC, but it is not
131  *				fatal (maybe it is on an unused memory area,
132  *				or the memory controller could recover from
133  *				it for example, by re-trying the operation).
134  * @HW_EVENT_ERR_FATAL:		Fatal Error - Uncorrected error that could not
135  *				be recovered.
136  */
137 enum hw_event_mc_err_type {
138 	HW_EVENT_ERR_CORRECTED,
139 	HW_EVENT_ERR_UNCORRECTED,
140 	HW_EVENT_ERR_FATAL,
141 	HW_EVENT_ERR_INFO,
142 };
143 
mc_event_error_type(const unsigned int err_type)144 static inline char *mc_event_error_type(const unsigned int err_type)
145 {
146 	switch (err_type) {
147 	case HW_EVENT_ERR_CORRECTED:
148 		return "Corrected";
149 	case HW_EVENT_ERR_UNCORRECTED:
150 		return "Uncorrected";
151 	case HW_EVENT_ERR_FATAL:
152 		return "Fatal";
153 	default:
154 	case HW_EVENT_ERR_INFO:
155 		return "Info";
156 	}
157 }
158 
159 /**
160  * enum mem_type - memory types. For a more detailed reference, please see
161  *			http://en.wikipedia.org/wiki/DRAM
162  *
163  * @MEM_EMPTY		Empty csrow
164  * @MEM_RESERVED:	Reserved csrow type
165  * @MEM_UNKNOWN:	Unknown csrow type
166  * @MEM_FPM:		FPM - Fast Page Mode, used on systems up to 1995.
167  * @MEM_EDO:		EDO - Extended data out, used on systems up to 1998.
168  * @MEM_BEDO:		BEDO - Burst Extended data out, an EDO variant.
169  * @MEM_SDR:		SDR - Single data rate SDRAM
170  *			http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory
171  *			They use 3 pins for chip select: Pins 0 and 2 are
172  *			for rank 0; pins 1 and 3 are for rank 1, if the memory
173  *			is dual-rank.
174  * @MEM_RDR:		Registered SDR SDRAM
175  * @MEM_DDR:		Double data rate SDRAM
176  *			http://en.wikipedia.org/wiki/DDR_SDRAM
177  * @MEM_RDDR:		Registered Double data rate SDRAM
178  *			This is a variant of the DDR memories.
179  *			A registered memory has a buffer inside it, hiding
180  *			part of the memory details to the memory controller.
181  * @MEM_RMBS:		Rambus DRAM, used on a few Pentium III/IV controllers.
182  * @MEM_DDR2:		DDR2 RAM, as described at JEDEC JESD79-2F.
183  *			Those memories are labed as "PC2-" instead of "PC" to
184  *			differenciate from DDR.
185  * @MEM_FB_DDR2:	Fully-Buffered DDR2, as described at JEDEC Std No. 205
186  *			and JESD206.
187  *			Those memories are accessed per DIMM slot, and not by
188  *			a chip select signal.
189  * @MEM_RDDR2:		Registered DDR2 RAM
190  *			This is a variant of the DDR2 memories.
191  * @MEM_XDR:		Rambus XDR
192  *			It is an evolution of the original RAMBUS memories,
193  *			created to compete with DDR2. Weren't used on any
194  *			x86 arch, but cell_edac PPC memory controller uses it.
195  * @MEM_DDR3:		DDR3 RAM
196  * @MEM_RDDR3:		Registered DDR3 RAM
197  *			This is a variant of the DDR3 memories.
198  * @MEM_LRDDR3		Load-Reduced DDR3 memory.
199  * @MEM_DDR4:		Unbuffered DDR4 RAM
200  * @MEM_RDDR4:		Registered DDR4 RAM
201  *			This is a variant of the DDR4 memories.
202  */
203 enum mem_type {
204 	MEM_EMPTY = 0,
205 	MEM_RESERVED,
206 	MEM_UNKNOWN,
207 	MEM_FPM,
208 	MEM_EDO,
209 	MEM_BEDO,
210 	MEM_SDR,
211 	MEM_RDR,
212 	MEM_DDR,
213 	MEM_RDDR,
214 	MEM_RMBS,
215 	MEM_DDR2,
216 	MEM_FB_DDR2,
217 	MEM_RDDR2,
218 	MEM_XDR,
219 	MEM_DDR3,
220 	MEM_RDDR3,
221 	MEM_LRDDR3,
222 	MEM_DDR4,
223 	MEM_RDDR4,
224 };
225 
226 #define MEM_FLAG_EMPTY		BIT(MEM_EMPTY)
227 #define MEM_FLAG_RESERVED	BIT(MEM_RESERVED)
228 #define MEM_FLAG_UNKNOWN	BIT(MEM_UNKNOWN)
229 #define MEM_FLAG_FPM		BIT(MEM_FPM)
230 #define MEM_FLAG_EDO		BIT(MEM_EDO)
231 #define MEM_FLAG_BEDO		BIT(MEM_BEDO)
232 #define MEM_FLAG_SDR		BIT(MEM_SDR)
233 #define MEM_FLAG_RDR		BIT(MEM_RDR)
234 #define MEM_FLAG_DDR		BIT(MEM_DDR)
235 #define MEM_FLAG_RDDR		BIT(MEM_RDDR)
236 #define MEM_FLAG_RMBS		BIT(MEM_RMBS)
237 #define MEM_FLAG_DDR2           BIT(MEM_DDR2)
238 #define MEM_FLAG_FB_DDR2        BIT(MEM_FB_DDR2)
239 #define MEM_FLAG_RDDR2          BIT(MEM_RDDR2)
240 #define MEM_FLAG_XDR            BIT(MEM_XDR)
241 #define MEM_FLAG_DDR3		 BIT(MEM_DDR3)
242 #define MEM_FLAG_RDDR3		 BIT(MEM_RDDR3)
243 
244 /**
245  * enum edac-type - Error Detection and Correction capabilities and mode
246  * @EDAC_UNKNOWN:	Unknown if ECC is available
247  * @EDAC_NONE:		Doesn't support ECC
248  * @EDAC_RESERVED:	Reserved ECC type
249  * @EDAC_PARITY:	Detects parity errors
250  * @EDAC_EC:		Error Checking - no correction
251  * @EDAC_SECDED:	Single bit error correction, Double detection
252  * @EDAC_S2ECD2ED:	Chipkill x2 devices - do these exist?
253  * @EDAC_S4ECD4ED:	Chipkill x4 devices
254  * @EDAC_S8ECD8ED:	Chipkill x8 devices
255  * @EDAC_S16ECD16ED:	Chipkill x16 devices
256  */
257 enum edac_type {
258 	EDAC_UNKNOWN =	0,
259 	EDAC_NONE,
260 	EDAC_RESERVED,
261 	EDAC_PARITY,
262 	EDAC_EC,
263 	EDAC_SECDED,
264 	EDAC_S2ECD2ED,
265 	EDAC_S4ECD4ED,
266 	EDAC_S8ECD8ED,
267 	EDAC_S16ECD16ED,
268 };
269 
270 #define EDAC_FLAG_UNKNOWN	BIT(EDAC_UNKNOWN)
271 #define EDAC_FLAG_NONE		BIT(EDAC_NONE)
272 #define EDAC_FLAG_PARITY	BIT(EDAC_PARITY)
273 #define EDAC_FLAG_EC		BIT(EDAC_EC)
274 #define EDAC_FLAG_SECDED	BIT(EDAC_SECDED)
275 #define EDAC_FLAG_S2ECD2ED	BIT(EDAC_S2ECD2ED)
276 #define EDAC_FLAG_S4ECD4ED	BIT(EDAC_S4ECD4ED)
277 #define EDAC_FLAG_S8ECD8ED	BIT(EDAC_S8ECD8ED)
278 #define EDAC_FLAG_S16ECD16ED	BIT(EDAC_S16ECD16ED)
279 
280 /**
281  * enum scrub_type - scrubbing capabilities
282  * @SCRUB_UNKNOWN		Unknown if scrubber is available
283  * @SCRUB_NONE:			No scrubber
284  * @SCRUB_SW_PROG:		SW progressive (sequential) scrubbing
285  * @SCRUB_SW_SRC:		Software scrub only errors
286  * @SCRUB_SW_PROG_SRC:		Progressive software scrub from an error
287  * @SCRUB_SW_TUNABLE:		Software scrub frequency is tunable
288  * @SCRUB_HW_PROG:		HW progressive (sequential) scrubbing
289  * @SCRUB_HW_SRC:		Hardware scrub only errors
290  * @SCRUB_HW_PROG_SRC:		Progressive hardware scrub from an error
291  * SCRUB_HW_TUNABLE:		Hardware scrub frequency is tunable
292  */
293 enum scrub_type {
294 	SCRUB_UNKNOWN =	0,
295 	SCRUB_NONE,
296 	SCRUB_SW_PROG,
297 	SCRUB_SW_SRC,
298 	SCRUB_SW_PROG_SRC,
299 	SCRUB_SW_TUNABLE,
300 	SCRUB_HW_PROG,
301 	SCRUB_HW_SRC,
302 	SCRUB_HW_PROG_SRC,
303 	SCRUB_HW_TUNABLE
304 };
305 
306 #define SCRUB_FLAG_SW_PROG	BIT(SCRUB_SW_PROG)
307 #define SCRUB_FLAG_SW_SRC	BIT(SCRUB_SW_SRC)
308 #define SCRUB_FLAG_SW_PROG_SRC	BIT(SCRUB_SW_PROG_SRC)
309 #define SCRUB_FLAG_SW_TUN	BIT(SCRUB_SW_SCRUB_TUNABLE)
310 #define SCRUB_FLAG_HW_PROG	BIT(SCRUB_HW_PROG)
311 #define SCRUB_FLAG_HW_SRC	BIT(SCRUB_HW_SRC)
312 #define SCRUB_FLAG_HW_PROG_SRC	BIT(SCRUB_HW_PROG_SRC)
313 #define SCRUB_FLAG_HW_TUN	BIT(SCRUB_HW_TUNABLE)
314 
315 /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
316 
317 /* EDAC internal operation states */
318 #define	OP_ALLOC		0x100
319 #define OP_RUNNING_POLL		0x201
320 #define OP_RUNNING_INTERRUPT	0x202
321 #define OP_RUNNING_POLL_INTR	0x203
322 #define OP_OFFLINE		0x300
323 
324 /*
325  * Concepts used at the EDAC subsystem
326  *
327  * There are several things to be aware of that aren't at all obvious:
328  *
329  * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
330  *
331  * These are some of the many terms that are thrown about that don't always
332  * mean what people think they mean (Inconceivable!).  In the interest of
333  * creating a common ground for discussion, terms and their definitions
334  * will be established.
335  *
336  * Memory devices:	The individual DRAM chips on a memory stick.  These
337  *			devices commonly output 4 and 8 bits each (x4, x8).
338  *			Grouping several of these in parallel provides the
339  *			number of bits that the memory controller expects:
340  *			typically 72 bits, in order to provide 64 bits +
341  *			8 bits of ECC data.
342  *
343  * Memory Stick:	A printed circuit board that aggregates multiple
344  *			memory devices in parallel.  In general, this is the
345  *			Field Replaceable Unit (FRU) which gets replaced, in
346  *			the case of excessive errors. Most often it is also
347  *			called DIMM (Dual Inline Memory Module).
348  *
349  * Memory Socket:	A physical connector on the motherboard that accepts
350  *			a single memory stick. Also called as "slot" on several
351  *			datasheets.
352  *
353  * Channel:		A memory controller channel, responsible to communicate
354  *			with a group of DIMMs. Each channel has its own
355  *			independent control (command) and data bus, and can
356  *			be used independently or grouped with other channels.
357  *
358  * Branch:		It is typically the highest hierarchy on a
359  *			Fully-Buffered DIMM memory controller.
360  *			Typically, it contains two channels.
361  *			Two channels at the same branch can be used in single
362  *			mode or in lockstep mode.
363  *			When lockstep is enabled, the cacheline is doubled,
364  *			but it generally brings some performance penalty.
365  *			Also, it is generally not possible to point to just one
366  *			memory stick when an error occurs, as the error
367  *			correction code is calculated using two DIMMs instead
368  *			of one. Due to that, it is capable of correcting more
369  *			errors than on single mode.
370  *
371  * Single-channel:	The data accessed by the memory controller is contained
372  *			into one dimm only. E. g. if the data is 64 bits-wide,
373  *			the data flows to the CPU using one 64 bits parallel
374  *			access.
375  *			Typically used with SDR, DDR, DDR2 and DDR3 memories.
376  *			FB-DIMM and RAMBUS use a different concept for channel,
377  *			so this concept doesn't apply there.
378  *
379  * Double-channel:	The data size accessed by the memory controller is
380  *			interlaced into two dimms, accessed at the same time.
381  *			E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
382  *			the data flows to the CPU using a 128 bits parallel
383  *			access.
384  *
385  * Chip-select row:	This is the name of the DRAM signal used to select the
386  *			DRAM ranks to be accessed. Common chip-select rows for
387  *			single channel are 64 bits, for dual channel 128 bits.
388  *			It may not be visible by the memory controller, as some
389  *			DIMM types have a memory buffer that can hide direct
390  *			access to it from the Memory Controller.
391  *
392  * Single-Ranked stick:	A Single-ranked stick has 1 chip-select row of memory.
393  *			Motherboards commonly drive two chip-select pins to
394  *			a memory stick. A single-ranked stick, will occupy
395  *			only one of those rows. The other will be unused.
396  *
397  * Double-Ranked stick:	A double-ranked stick has two chip-select rows which
398  *			access different sets of memory devices.  The two
399  *			rows cannot be accessed concurrently.
400  *
401  * Double-sided stick:	DEPRECATED TERM, see Double-Ranked stick.
402  *			A double-sided stick has two chip-select rows which
403  *			access different sets of memory devices. The two
404  *			rows cannot be accessed concurrently. "Double-sided"
405  *			is irrespective of the memory devices being mounted
406  *			on both sides of the memory stick.
407  *
408  * Socket set:		All of the memory sticks that are required for
409  *			a single memory access or all of the memory sticks
410  *			spanned by a chip-select row.  A single socket set
411  *			has two chip-select rows and if double-sided sticks
412  *			are used these will occupy those chip-select rows.
413  *
414  * Bank:		This term is avoided because it is unclear when
415  *			needing to distinguish between chip-select rows and
416  *			socket sets.
417  *
418  * Controller pages:
419  *
420  * Physical pages:
421  *
422  * Virtual pages:
423  *
424  *
425  * STRUCTURE ORGANIZATION AND CHOICES
426  *
427  *
428  *
429  * PS - I enjoyed writing all that about as much as you enjoyed reading it.
430  */
431 
432 /**
433  * enum edac_mc_layer - memory controller hierarchy layer
434  *
435  * @EDAC_MC_LAYER_BRANCH:	memory layer is named "branch"
436  * @EDAC_MC_LAYER_CHANNEL:	memory layer is named "channel"
437  * @EDAC_MC_LAYER_SLOT:		memory layer is named "slot"
438  * @EDAC_MC_LAYER_CHIP_SELECT:	memory layer is named "chip select"
439  * @EDAC_MC_LAYER_ALL_MEM:	memory layout is unknown. All memory is mapped
440  *				as a single memory area. This is used when
441  *				retrieving errors from a firmware driven driver.
442  *
443  * This enum is used by the drivers to tell edac_mc_sysfs what name should
444  * be used when describing a memory stick location.
445  */
446 enum edac_mc_layer_type {
447 	EDAC_MC_LAYER_BRANCH,
448 	EDAC_MC_LAYER_CHANNEL,
449 	EDAC_MC_LAYER_SLOT,
450 	EDAC_MC_LAYER_CHIP_SELECT,
451 	EDAC_MC_LAYER_ALL_MEM,
452 };
453 
454 /**
455  * struct edac_mc_layer - describes the memory controller hierarchy
456  * @layer:		layer type
457  * @size:		number of components per layer. For example,
458  *			if the channel layer has two channels, size = 2
459  * @is_virt_csrow:	This layer is part of the "csrow" when old API
460  *			compatibility mode is enabled. Otherwise, it is
461  *			a channel
462  */
463 struct edac_mc_layer {
464 	enum edac_mc_layer_type	type;
465 	unsigned		size;
466 	bool			is_virt_csrow;
467 };
468 
469 /*
470  * Maximum number of layers used by the memory controller to uniquely
471  * identify a single memory stick.
472  * NOTE: Changing this constant requires not only to change the constant
473  * below, but also to change the existing code at the core, as there are
474  * some code there that are optimized for 3 layers.
475  */
476 #define EDAC_MAX_LAYERS		3
477 
478 /**
479  * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array
480  *		   for the element given by [layer0,layer1,layer2] position
481  *
482  * @layers:	a struct edac_mc_layer array, describing how many elements
483  *		were allocated for each layer
484  * @n_layers:	Number of layers at the @layers array
485  * @layer0:	layer0 position
486  * @layer1:	layer1 position. Unused if n_layers < 2
487  * @layer2:	layer2 position. Unused if n_layers < 3
488  *
489  * For 1 layer, this macro returns &var[layer0] - &var
490  * For 2 layers, this macro is similar to allocate a bi-dimensional array
491  *		and to return "&var[layer0][layer1] - &var"
492  * For 3 layers, this macro is similar to allocate a tri-dimensional array
493  *		and to return "&var[layer0][layer1][layer2] - &var"
494  *
495  * A loop could be used here to make it more generic, but, as we only have
496  * 3 layers, this is a little faster.
497  * By design, layers can never be 0 or more than 3. If that ever happens,
498  * a NULL is returned, causing an OOPS during the memory allocation routine,
499  * with would point to the developer that he's doing something wrong.
500  */
501 #define EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2) ({		\
502 	int __i;							\
503 	if ((nlayers) == 1)						\
504 		__i = layer0;						\
505 	else if ((nlayers) == 2)					\
506 		__i = (layer1) + ((layers[1]).size * (layer0));		\
507 	else if ((nlayers) == 3)					\
508 		__i = (layer2) + ((layers[2]).size * ((layer1) +	\
509 			    ((layers[1]).size * (layer0))));		\
510 	else								\
511 		__i = -EINVAL;						\
512 	__i;								\
513 })
514 
515 /**
516  * EDAC_DIMM_PTR - Macro responsible to get a pointer inside a pointer array
517  *		   for the element given by [layer0,layer1,layer2] position
518  *
519  * @layers:	a struct edac_mc_layer array, describing how many elements
520  *		were allocated for each layer
521  * @var:	name of the var where we want to get the pointer
522  *		(like mci->dimms)
523  * @n_layers:	Number of layers at the @layers array
524  * @layer0:	layer0 position
525  * @layer1:	layer1 position. Unused if n_layers < 2
526  * @layer2:	layer2 position. Unused if n_layers < 3
527  *
528  * For 1 layer, this macro returns &var[layer0]
529  * For 2 layers, this macro is similar to allocate a bi-dimensional array
530  *		and to return "&var[layer0][layer1]"
531  * For 3 layers, this macro is similar to allocate a tri-dimensional array
532  *		and to return "&var[layer0][layer1][layer2]"
533  */
534 #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({	\
535 	typeof(*var) __p;						\
536 	int ___i = EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2);	\
537 	if (___i < 0)							\
538 		__p = NULL;						\
539 	else								\
540 		__p = (var)[___i];					\
541 	__p;								\
542 })
543 
544 struct dimm_info {
545 	struct device dev;
546 
547 	char label[EDAC_MC_LABEL_LEN + 1];	/* DIMM label on motherboard */
548 
549 	/* Memory location data */
550 	unsigned location[EDAC_MAX_LAYERS];
551 
552 	struct mem_ctl_info *mci;	/* the parent */
553 
554 	u32 grain;		/* granularity of reported error in bytes */
555 	enum dev_type dtype;	/* memory device type */
556 	enum mem_type mtype;	/* memory dimm type */
557 	enum edac_type edac_mode;	/* EDAC mode for this dimm */
558 
559 	u32 nr_pages;			/* number of pages on this dimm */
560 
561 	unsigned csrow, cschannel;	/* Points to the old API data */
562 };
563 
564 /**
565  * struct rank_info - contains the information for one DIMM rank
566  *
567  * @chan_idx:	channel number where the rank is (typically, 0 or 1)
568  * @ce_count:	number of correctable errors for this rank
569  * @csrow:	A pointer to the chip select row structure (the parent
570  *		structure). The location of the rank is given by
571  *		the (csrow->csrow_idx, chan_idx) vector.
572  * @dimm:	A pointer to the DIMM structure, where the DIMM label
573  *		information is stored.
574  *
575  * FIXME: Currently, the EDAC core model will assume one DIMM per rank.
576  *	  This is a bad assumption, but it makes this patch easier. Later
577  *	  patches in this series will fix this issue.
578  */
579 struct rank_info {
580 	int chan_idx;
581 	struct csrow_info *csrow;
582 	struct dimm_info *dimm;
583 
584 	u32 ce_count;		/* Correctable Errors for this csrow */
585 };
586 
587 struct csrow_info {
588 	struct device dev;
589 
590 	/* Used only by edac_mc_find_csrow_by_page() */
591 	unsigned long first_page;	/* first page number in csrow */
592 	unsigned long last_page;	/* last page number in csrow */
593 	unsigned long page_mask;	/* used for interleaving -
594 					 * 0UL for non intlv */
595 
596 	int csrow_idx;			/* the chip-select row */
597 
598 	u32 ue_count;		/* Uncorrectable Errors for this csrow */
599 	u32 ce_count;		/* Correctable Errors for this csrow */
600 
601 	struct mem_ctl_info *mci;	/* the parent */
602 
603 	/* channel information for this csrow */
604 	u32 nr_channels;
605 	struct rank_info **channels;
606 };
607 
608 /*
609  * struct errcount_attribute - used to store the several error counts
610  */
611 struct errcount_attribute_data {
612 	int n_layers;
613 	int pos[EDAC_MAX_LAYERS];
614 	int layer0, layer1, layer2;
615 };
616 
617 /**
618  * edac_raw_error_desc - Raw error report structure
619  * @grain:			minimum granularity for an error report, in bytes
620  * @error_count:		number of errors of the same type
621  * @top_layer:			top layer of the error (layer[0])
622  * @mid_layer:			middle layer of the error (layer[1])
623  * @low_layer:			low layer of the error (layer[2])
624  * @page_frame_number:		page where the error happened
625  * @offset_in_page:		page offset
626  * @syndrome:			syndrome of the error (or 0 if unknown or if
627  * 				the syndrome is not applicable)
628  * @msg:			error message
629  * @location:			location of the error
630  * @label:			label of the affected DIMM(s)
631  * @other_detail:		other driver-specific detail about the error
632  * @enable_per_layer_report:	if false, the error affects all layers
633  *				(typically, a memory controller error)
634  */
635 struct edac_raw_error_desc {
636 	/*
637 	 * NOTE: everything before grain won't be cleaned by
638 	 * edac_raw_error_desc_clean()
639 	 */
640 	char location[LOCATION_SIZE];
641 	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
642 	long grain;
643 
644 	/* the vars below and grain will be cleaned on every new error report */
645 	u16 error_count;
646 	int top_layer;
647 	int mid_layer;
648 	int low_layer;
649 	unsigned long page_frame_number;
650 	unsigned long offset_in_page;
651 	unsigned long syndrome;
652 	const char *msg;
653 	const char *other_detail;
654 	bool enable_per_layer_report;
655 };
656 
657 /* MEMORY controller information structure
658  */
659 struct mem_ctl_info {
660 	struct device			dev;
661 	struct bus_type			*bus;
662 
663 	struct list_head link;	/* for global list of mem_ctl_info structs */
664 
665 	struct module *owner;	/* Module owner of this control struct */
666 
667 	unsigned long mtype_cap;	/* memory types supported by mc */
668 	unsigned long edac_ctl_cap;	/* Mem controller EDAC capabilities */
669 	unsigned long edac_cap;	/* configuration capabilities - this is
670 				 * closely related to edac_ctl_cap.  The
671 				 * difference is that the controller may be
672 				 * capable of s4ecd4ed which would be listed
673 				 * in edac_ctl_cap, but if channels aren't
674 				 * capable of s4ecd4ed then the edac_cap would
675 				 * not have that capability.
676 				 */
677 	unsigned long scrub_cap;	/* chipset scrub capabilities */
678 	enum scrub_type scrub_mode;	/* current scrub mode */
679 
680 	/* Translates sdram memory scrub rate given in bytes/sec to the
681 	   internal representation and configures whatever else needs
682 	   to be configured.
683 	 */
684 	int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
685 
686 	/* Get the current sdram memory scrub rate from the internal
687 	   representation and converts it to the closest matching
688 	   bandwidth in bytes/sec.
689 	 */
690 	int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
691 
692 
693 	/* pointer to edac checking routine */
694 	void (*edac_check) (struct mem_ctl_info * mci);
695 
696 	/*
697 	 * Remaps memory pages: controller pages to physical pages.
698 	 * For most MC's, this will be NULL.
699 	 */
700 	/* FIXME - why not send the phys page to begin with? */
701 	unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
702 					   unsigned long page);
703 	int mc_idx;
704 	struct csrow_info **csrows;
705 	unsigned nr_csrows, num_cschannel;
706 
707 	/*
708 	 * Memory Controller hierarchy
709 	 *
710 	 * There are basically two types of memory controller: the ones that
711 	 * sees memory sticks ("dimms"), and the ones that sees memory ranks.
712 	 * All old memory controllers enumerate memories per rank, but most
713 	 * of the recent drivers enumerate memories per DIMM, instead.
714 	 * When the memory controller is per rank, csbased is true.
715 	 */
716 	unsigned n_layers;
717 	struct edac_mc_layer *layers;
718 	bool csbased;
719 
720 	/*
721 	 * DIMM info. Will eventually remove the entire csrows_info some day
722 	 */
723 	unsigned tot_dimms;
724 	struct dimm_info **dimms;
725 
726 	/*
727 	 * FIXME - what about controllers on other busses? - IDs must be
728 	 * unique.  dev pointer should be sufficiently unique, but
729 	 * BUS:SLOT.FUNC numbers may not be unique.
730 	 */
731 	struct device *pdev;
732 	const char *mod_name;
733 	const char *mod_ver;
734 	const char *ctl_name;
735 	const char *dev_name;
736 	void *pvt_info;
737 	unsigned long start_time;	/* mci load start time (in jiffies) */
738 
739 	/*
740 	 * drivers shouldn't access those fields directly, as the core
741 	 * already handles that.
742 	 */
743 	u32 ce_noinfo_count, ue_noinfo_count;
744 	u32 ue_mc, ce_mc;
745 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
746 
747 	struct completion complete;
748 
749 	/* Additional top controller level attributes, but specified
750 	 * by the low level driver.
751 	 *
752 	 * Set by the low level driver to provide attributes at the
753 	 * controller level.
754 	 * An array of structures, NULL terminated
755 	 *
756 	 * If attributes are desired, then set to array of attributes
757 	 * If no attributes are desired, leave NULL
758 	 */
759 	const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
760 
761 	/* work struct for this MC */
762 	struct delayed_work work;
763 
764 	/*
765 	 * Used to report an error - by being at the global struct
766 	 * makes the memory allocated by the EDAC core
767 	 */
768 	struct edac_raw_error_desc error_desc;
769 
770 	/* the internal state of this controller instance */
771 	int op_state;
772 
773 	struct dentry *debugfs;
774 	u8 fake_inject_layer[EDAC_MAX_LAYERS];
775 	bool fake_inject_ue;
776 	u16 fake_inject_count;
777 };
778 
779 /*
780  * Maximum number of memory controllers in the coherent fabric.
781  */
782 #define EDAC_MAX_MCS	2 * MAX_NUMNODES
783 
784 #endif
785