• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <asm/uaccess.h>
31 #include <asm/page.h>
32 #include <asm/edac.h>
33 #include "edac_core.h"
34 #include "edac_module.h"
35 
36 /* lock to memory controller's control array */
37 static DEFINE_MUTEX(mem_ctls_mutex);
38 static LIST_HEAD(mc_devices);
39 
40 #ifdef CONFIG_EDAC_DEBUG
41 
edac_mc_dump_channel(struct rank_info * chan)42 static void edac_mc_dump_channel(struct rank_info *chan)
43 {
44 	debugf4("\tchannel = %p\n", chan);
45 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46 	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
47 	debugf4("\tchannel->label = '%s'\n", chan->label);
48 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
49 }
50 
edac_mc_dump_csrow(struct csrow_info * csrow)51 static void edac_mc_dump_csrow(struct csrow_info *csrow)
52 {
53 	debugf4("\tcsrow = %p\n", csrow);
54 	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
55 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
56 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
57 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
58 	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
59 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
60 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
61 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62 }
63 
edac_mc_dump_mci(struct mem_ctl_info * mci)64 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
65 {
66 	debugf3("\tmci = %p\n", mci);
67 	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68 	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69 	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72 		mci->nr_csrows, mci->csrows);
73 	debugf3("\tdev = %p\n", mci->dev);
74 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
75 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76 }
77 
78 #endif				/* CONFIG_EDAC_DEBUG */
79 
80 /*
81  * keep those in sync with the enum mem_type
82  */
83 const char *edac_mem_types[] = {
84 	"Empty csrow",
85 	"Reserved csrow type",
86 	"Unknown csrow type",
87 	"Fast page mode RAM",
88 	"Extended data out RAM",
89 	"Burst Extended data out RAM",
90 	"Single data rate SDRAM",
91 	"Registered single data rate SDRAM",
92 	"Double data rate SDRAM",
93 	"Registered Double data rate SDRAM",
94 	"Rambus DRAM",
95 	"Unbuffered DDR2 RAM",
96 	"Fully buffered DDR2",
97 	"Registered DDR2 RAM",
98 	"Rambus XDR",
99 	"Unbuffered DDR3 RAM",
100 	"Registered DDR3 RAM",
101 };
102 EXPORT_SYMBOL_GPL(edac_mem_types);
103 
104 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
105  * Adjust 'ptr' so that its alignment is at least as stringent as what the
106  * compiler would provide for X and return the aligned result.
107  *
108  * If 'size' is a constant, the compiler will optimize this whole function
109  * down to either a no-op or the addition of a constant to the value of 'ptr'.
110  */
edac_align_ptr(void * ptr,unsigned size)111 void *edac_align_ptr(void *ptr, unsigned size)
112 {
113 	unsigned align, r;
114 
115 	/* Here we assume that the alignment of a "long long" is the most
116 	 * stringent alignment that the compiler will ever provide by default.
117 	 * As far as I know, this is a reasonable assumption.
118 	 */
119 	if (size > sizeof(long))
120 		align = sizeof(long long);
121 	else if (size > sizeof(int))
122 		align = sizeof(long);
123 	else if (size > sizeof(short))
124 		align = sizeof(int);
125 	else if (size > sizeof(char))
126 		align = sizeof(short);
127 	else
128 		return (char *)ptr;
129 
130 	r = size % align;
131 
132 	if (r == 0)
133 		return (char *)ptr;
134 
135 	return (void *)(((unsigned long)ptr) + align - r);
136 }
137 
138 /**
139  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
140  * @size_pvt:	size of private storage needed
141  * @nr_csrows:	Number of CWROWS needed for this MC
142  * @nr_chans:	Number of channels for the MC
143  *
144  * Everything is kmalloc'ed as one big chunk - more efficient.
145  * Only can be used if all structures have the same lifetime - otherwise
146  * you have to allocate and initialize your own structures.
147  *
148  * Use edac_mc_free() to free mc structures allocated by this function.
149  *
150  * Returns:
151  *	NULL allocation failed
152  *	struct mem_ctl_info pointer
153  */
edac_mc_alloc(unsigned sz_pvt,unsigned nr_csrows,unsigned nr_chans,int edac_index)154 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
155 				unsigned nr_chans, int edac_index)
156 {
157 	struct mem_ctl_info *mci;
158 	struct csrow_info *csi, *csrow;
159 	struct rank_info *chi, *chp, *chan;
160 	void *pvt;
161 	unsigned size;
162 	int row, chn;
163 	int err;
164 
165 	/* Figure out the offsets of the various items from the start of an mc
166 	 * structure.  We want the alignment of each item to be at least as
167 	 * stringent as what the compiler would provide if we could simply
168 	 * hardcode everything into a single struct.
169 	 */
170 	mci = (struct mem_ctl_info *)0;
171 	csi = edac_align_ptr(&mci[1], sizeof(*csi));
172 	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
173 	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
174 	size = ((unsigned long)pvt) + sz_pvt;
175 
176 	mci = kzalloc(size, GFP_KERNEL);
177 	if (mci == NULL)
178 		return NULL;
179 
180 	/* Adjust pointers so they point within the memory we just allocated
181 	 * rather than an imaginary chunk of memory located at address 0.
182 	 */
183 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
184 	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
185 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
186 
187 	/* setup index and various internal pointers */
188 	mci->mc_idx = edac_index;
189 	mci->csrows = csi;
190 	mci->pvt_info = pvt;
191 	mci->nr_csrows = nr_csrows;
192 
193 	for (row = 0; row < nr_csrows; row++) {
194 		csrow = &csi[row];
195 		csrow->csrow_idx = row;
196 		csrow->mci = mci;
197 		csrow->nr_channels = nr_chans;
198 		chp = &chi[row * nr_chans];
199 		csrow->channels = chp;
200 
201 		for (chn = 0; chn < nr_chans; chn++) {
202 			chan = &chp[chn];
203 			chan->chan_idx = chn;
204 			chan->csrow = csrow;
205 		}
206 	}
207 
208 	mci->op_state = OP_ALLOC;
209 	INIT_LIST_HEAD(&mci->grp_kobj_list);
210 
211 	/*
212 	 * Initialize the 'root' kobj for the edac_mc controller
213 	 */
214 	err = edac_mc_register_sysfs_main_kobj(mci);
215 	if (err) {
216 		kfree(mci);
217 		return NULL;
218 	}
219 
220 	/* at this point, the root kobj is valid, and in order to
221 	 * 'free' the object, then the function:
222 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
223 	 * which will perform kobj unregistration and the actual free
224 	 * will occur during the kobject callback operation
225 	 */
226 	return mci;
227 }
228 EXPORT_SYMBOL_GPL(edac_mc_alloc);
229 
230 /**
231  * edac_mc_free
232  *	'Free' a previously allocated 'mci' structure
233  * @mci: pointer to a struct mem_ctl_info structure
234  */
edac_mc_free(struct mem_ctl_info * mci)235 void edac_mc_free(struct mem_ctl_info *mci)
236 {
237 	debugf1("%s()\n", __func__);
238 
239 	edac_mc_unregister_sysfs_main_kobj(mci);
240 
241 	/* free the mci instance memory here */
242 	kfree(mci);
243 }
244 EXPORT_SYMBOL_GPL(edac_mc_free);
245 
246 
247 /**
248  * find_mci_by_dev
249  *
250  *	scan list of controllers looking for the one that manages
251  *	the 'dev' device
252  * @dev: pointer to a struct device related with the MCI
253  */
find_mci_by_dev(struct device * dev)254 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
255 {
256 	struct mem_ctl_info *mci;
257 	struct list_head *item;
258 
259 	debugf3("%s()\n", __func__);
260 
261 	list_for_each(item, &mc_devices) {
262 		mci = list_entry(item, struct mem_ctl_info, link);
263 
264 		if (mci->dev == dev)
265 			return mci;
266 	}
267 
268 	return NULL;
269 }
270 EXPORT_SYMBOL_GPL(find_mci_by_dev);
271 
272 /*
273  * handler for EDAC to check if NMI type handler has asserted interrupt
274  */
edac_mc_assert_error_check_and_clear(void)275 static int edac_mc_assert_error_check_and_clear(void)
276 {
277 	int old_state;
278 
279 	if (edac_op_state == EDAC_OPSTATE_POLL)
280 		return 1;
281 
282 	old_state = edac_err_assert;
283 	edac_err_assert = 0;
284 
285 	return old_state;
286 }
287 
288 /*
289  * edac_mc_workq_function
290  *	performs the operation scheduled by a workq request
291  */
edac_mc_workq_function(struct work_struct * work_req)292 static void edac_mc_workq_function(struct work_struct *work_req)
293 {
294 	struct delayed_work *d_work = to_delayed_work(work_req);
295 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
296 
297 	mutex_lock(&mem_ctls_mutex);
298 
299 	/* if this control struct has movd to offline state, we are done */
300 	if (mci->op_state == OP_OFFLINE) {
301 		mutex_unlock(&mem_ctls_mutex);
302 		return;
303 	}
304 
305 	/* Only poll controllers that are running polled and have a check */
306 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
307 		mci->edac_check(mci);
308 
309 	mutex_unlock(&mem_ctls_mutex);
310 
311 	/* Reschedule */
312 	queue_delayed_work(edac_workqueue, &mci->work,
313 			msecs_to_jiffies(edac_mc_get_poll_msec()));
314 }
315 
316 /*
317  * edac_mc_workq_setup
318  *	initialize a workq item for this mci
319  *	passing in the new delay period in msec
320  *
321  *	locking model:
322  *
323  *		called with the mem_ctls_mutex held
324  */
edac_mc_workq_setup(struct mem_ctl_info * mci,unsigned msec)325 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
326 {
327 	debugf0("%s()\n", __func__);
328 
329 	/* if this instance is not in the POLL state, then simply return */
330 	if (mci->op_state != OP_RUNNING_POLL)
331 		return;
332 
333 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
334 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
335 }
336 
337 /*
338  * edac_mc_workq_teardown
339  *	stop the workq processing on this mci
340  *
341  *	locking model:
342  *
343  *		called WITHOUT lock held
344  */
edac_mc_workq_teardown(struct mem_ctl_info * mci)345 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
346 {
347 	int status;
348 
349 	if (mci->op_state != OP_RUNNING_POLL)
350 		return;
351 
352 	status = cancel_delayed_work(&mci->work);
353 	if (status == 0) {
354 		debugf0("%s() not canceled, flush the queue\n",
355 			__func__);
356 
357 		/* workq instance might be running, wait for it */
358 		flush_workqueue(edac_workqueue);
359 	}
360 }
361 
362 /*
363  * edac_mc_reset_delay_period(unsigned long value)
364  *
365  *	user space has updated our poll period value, need to
366  *	reset our workq delays
367  */
edac_mc_reset_delay_period(int value)368 void edac_mc_reset_delay_period(int value)
369 {
370 	struct mem_ctl_info *mci;
371 	struct list_head *item;
372 
373 	mutex_lock(&mem_ctls_mutex);
374 
375 	/* scan the list and turn off all workq timers, doing so under lock
376 	 */
377 	list_for_each(item, &mc_devices) {
378 		mci = list_entry(item, struct mem_ctl_info, link);
379 
380 		if (mci->op_state == OP_RUNNING_POLL)
381 			cancel_delayed_work(&mci->work);
382 	}
383 
384 	mutex_unlock(&mem_ctls_mutex);
385 
386 
387 	/* re-walk the list, and reset the poll delay */
388 	mutex_lock(&mem_ctls_mutex);
389 
390 	list_for_each(item, &mc_devices) {
391 		mci = list_entry(item, struct mem_ctl_info, link);
392 
393 		edac_mc_workq_setup(mci, (unsigned long) value);
394 	}
395 
396 	mutex_unlock(&mem_ctls_mutex);
397 }
398 
399 
400 
401 /* Return 0 on success, 1 on failure.
402  * Before calling this function, caller must
403  * assign a unique value to mci->mc_idx.
404  *
405  *	locking model:
406  *
407  *		called with the mem_ctls_mutex lock held
408  */
add_mc_to_global_list(struct mem_ctl_info * mci)409 static int add_mc_to_global_list(struct mem_ctl_info *mci)
410 {
411 	struct list_head *item, *insert_before;
412 	struct mem_ctl_info *p;
413 
414 	insert_before = &mc_devices;
415 
416 	p = find_mci_by_dev(mci->dev);
417 	if (unlikely(p != NULL))
418 		goto fail0;
419 
420 	list_for_each(item, &mc_devices) {
421 		p = list_entry(item, struct mem_ctl_info, link);
422 
423 		if (p->mc_idx >= mci->mc_idx) {
424 			if (unlikely(p->mc_idx == mci->mc_idx))
425 				goto fail1;
426 
427 			insert_before = item;
428 			break;
429 		}
430 	}
431 
432 	list_add_tail_rcu(&mci->link, insert_before);
433 	atomic_inc(&edac_handlers);
434 	return 0;
435 
436 fail0:
437 	edac_printk(KERN_WARNING, EDAC_MC,
438 		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
439 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
440 	return 1;
441 
442 fail1:
443 	edac_printk(KERN_WARNING, EDAC_MC,
444 		"bug in low-level driver: attempt to assign\n"
445 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
446 	return 1;
447 }
448 
del_mc_from_global_list(struct mem_ctl_info * mci)449 static void del_mc_from_global_list(struct mem_ctl_info *mci)
450 {
451 	atomic_dec(&edac_handlers);
452 	list_del_rcu(&mci->link);
453 
454 	/* these are for safe removal of devices from global list while
455 	 * NMI handlers may be traversing list
456 	 */
457 	synchronize_rcu();
458 	INIT_LIST_HEAD(&mci->link);
459 }
460 
461 /**
462  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
463  *
464  * If found, return a pointer to the structure.
465  * Else return NULL.
466  *
467  * Caller must hold mem_ctls_mutex.
468  */
edac_mc_find(int idx)469 struct mem_ctl_info *edac_mc_find(int idx)
470 {
471 	struct list_head *item;
472 	struct mem_ctl_info *mci;
473 
474 	list_for_each(item, &mc_devices) {
475 		mci = list_entry(item, struct mem_ctl_info, link);
476 
477 		if (mci->mc_idx >= idx) {
478 			if (mci->mc_idx == idx)
479 				return mci;
480 
481 			break;
482 		}
483 	}
484 
485 	return NULL;
486 }
487 EXPORT_SYMBOL(edac_mc_find);
488 
489 /**
490  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
491  *                 create sysfs entries associated with mci structure
492  * @mci: pointer to the mci structure to be added to the list
493  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
494  *
495  * Return:
496  *	0	Success
497  *	!0	Failure
498  */
499 
500 /* FIXME - should a warning be printed if no error detection? correction? */
edac_mc_add_mc(struct mem_ctl_info * mci)501 int edac_mc_add_mc(struct mem_ctl_info *mci)
502 {
503 	debugf0("%s()\n", __func__);
504 
505 #ifdef CONFIG_EDAC_DEBUG
506 	if (edac_debug_level >= 3)
507 		edac_mc_dump_mci(mci);
508 
509 	if (edac_debug_level >= 4) {
510 		int i;
511 
512 		for (i = 0; i < mci->nr_csrows; i++) {
513 			int j;
514 
515 			edac_mc_dump_csrow(&mci->csrows[i]);
516 			for (j = 0; j < mci->csrows[i].nr_channels; j++)
517 				edac_mc_dump_channel(&mci->csrows[i].
518 						channels[j]);
519 		}
520 	}
521 #endif
522 	mutex_lock(&mem_ctls_mutex);
523 
524 	if (add_mc_to_global_list(mci))
525 		goto fail0;
526 
527 	/* set load time so that error rate can be tracked */
528 	mci->start_time = jiffies;
529 
530 	if (edac_create_sysfs_mci_device(mci)) {
531 		edac_mc_printk(mci, KERN_WARNING,
532 			"failed to create sysfs device\n");
533 		goto fail1;
534 	}
535 
536 	/* If there IS a check routine, then we are running POLLED */
537 	if (mci->edac_check != NULL) {
538 		/* This instance is NOW RUNNING */
539 		mci->op_state = OP_RUNNING_POLL;
540 
541 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
542 	} else {
543 		mci->op_state = OP_RUNNING_INTERRUPT;
544 	}
545 
546 	/* Report action taken */
547 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
548 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
549 
550 	mutex_unlock(&mem_ctls_mutex);
551 	return 0;
552 
553 fail1:
554 	del_mc_from_global_list(mci);
555 
556 fail0:
557 	mutex_unlock(&mem_ctls_mutex);
558 	return 1;
559 }
560 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
561 
562 /**
563  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
564  *                 remove mci structure from global list
565  * @pdev: Pointer to 'struct device' representing mci structure to remove.
566  *
567  * Return pointer to removed mci structure, or NULL if device not found.
568  */
edac_mc_del_mc(struct device * dev)569 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
570 {
571 	struct mem_ctl_info *mci;
572 
573 	debugf0("%s()\n", __func__);
574 
575 	mutex_lock(&mem_ctls_mutex);
576 
577 	/* find the requested mci struct in the global list */
578 	mci = find_mci_by_dev(dev);
579 	if (mci == NULL) {
580 		mutex_unlock(&mem_ctls_mutex);
581 		return NULL;
582 	}
583 
584 	del_mc_from_global_list(mci);
585 	mutex_unlock(&mem_ctls_mutex);
586 
587 	/* flush workq processes */
588 	edac_mc_workq_teardown(mci);
589 
590 	/* marking MCI offline */
591 	mci->op_state = OP_OFFLINE;
592 
593 	/* remove from sysfs */
594 	edac_remove_sysfs_mci_device(mci);
595 
596 	edac_printk(KERN_INFO, EDAC_MC,
597 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
598 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
599 
600 	return mci;
601 }
602 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
603 
edac_mc_scrub_block(unsigned long page,unsigned long offset,u32 size)604 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
605 				u32 size)
606 {
607 	struct page *pg;
608 	void *virt_addr;
609 	unsigned long flags = 0;
610 
611 	debugf3("%s()\n", __func__);
612 
613 	/* ECC error page was not in our memory. Ignore it. */
614 	if (!pfn_valid(page))
615 		return;
616 
617 	/* Find the actual page structure then map it and fix */
618 	pg = pfn_to_page(page);
619 
620 	if (PageHighMem(pg))
621 		local_irq_save(flags);
622 
623 	virt_addr = kmap_atomic(pg);
624 
625 	/* Perform architecture specific atomic scrub operation */
626 	atomic_scrub(virt_addr + offset, size);
627 
628 	/* Unmap and complete */
629 	kunmap_atomic(virt_addr);
630 
631 	if (PageHighMem(pg))
632 		local_irq_restore(flags);
633 }
634 
635 /* FIXME - should return -1 */
edac_mc_find_csrow_by_page(struct mem_ctl_info * mci,unsigned long page)636 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
637 {
638 	struct csrow_info *csrows = mci->csrows;
639 	int row, i;
640 
641 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
642 	row = -1;
643 
644 	for (i = 0; i < mci->nr_csrows; i++) {
645 		struct csrow_info *csrow = &csrows[i];
646 
647 		if (csrow->nr_pages == 0)
648 			continue;
649 
650 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
651 			"mask(0x%lx)\n", mci->mc_idx, __func__,
652 			csrow->first_page, page, csrow->last_page,
653 			csrow->page_mask);
654 
655 		if ((page >= csrow->first_page) &&
656 		    (page <= csrow->last_page) &&
657 		    ((page & csrow->page_mask) ==
658 		     (csrow->first_page & csrow->page_mask))) {
659 			row = i;
660 			break;
661 		}
662 	}
663 
664 	if (row == -1)
665 		edac_mc_printk(mci, KERN_ERR,
666 			"could not look up page error address %lx\n",
667 			(unsigned long)page);
668 
669 	return row;
670 }
671 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
672 
673 /* FIXME - setable log (warning/emerg) levels */
674 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
edac_mc_handle_ce(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,unsigned long syndrome,int row,int channel,const char * msg)675 void edac_mc_handle_ce(struct mem_ctl_info *mci,
676 		unsigned long page_frame_number,
677 		unsigned long offset_in_page, unsigned long syndrome,
678 		int row, int channel, const char *msg)
679 {
680 	unsigned long remapped_page;
681 
682 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
683 
684 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
685 	if (row >= mci->nr_csrows || row < 0) {
686 		/* something is wrong */
687 		edac_mc_printk(mci, KERN_ERR,
688 			"INTERNAL ERROR: row out of range "
689 			"(%d >= %d)\n", row, mci->nr_csrows);
690 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
691 		return;
692 	}
693 
694 	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
695 		/* something is wrong */
696 		edac_mc_printk(mci, KERN_ERR,
697 			"INTERNAL ERROR: channel out of range "
698 			"(%d >= %d)\n", channel,
699 			mci->csrows[row].nr_channels);
700 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
701 		return;
702 	}
703 
704 	if (edac_mc_get_log_ce())
705 		/* FIXME - put in DIMM location */
706 		edac_mc_printk(mci, KERN_WARNING,
707 			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
708 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
709 			page_frame_number, offset_in_page,
710 			mci->csrows[row].grain, syndrome, row, channel,
711 			mci->csrows[row].channels[channel].label, msg);
712 
713 	mci->ce_count++;
714 	mci->csrows[row].ce_count++;
715 	mci->csrows[row].channels[channel].ce_count++;
716 
717 	if (mci->scrub_mode & SCRUB_SW_SRC) {
718 		/*
719 		 * Some MC's can remap memory so that it is still available
720 		 * at a different address when PCI devices map into memory.
721 		 * MC's that can't do this lose the memory where PCI devices
722 		 * are mapped.  This mapping is MC dependent and so we call
723 		 * back into the MC driver for it to map the MC page to
724 		 * a physical (CPU) page which can then be mapped to a virtual
725 		 * page - which can then be scrubbed.
726 		 */
727 		remapped_page = mci->ctl_page_to_phys ?
728 			mci->ctl_page_to_phys(mci, page_frame_number) :
729 			page_frame_number;
730 
731 		edac_mc_scrub_block(remapped_page, offset_in_page,
732 				mci->csrows[row].grain);
733 	}
734 }
735 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
736 
edac_mc_handle_ce_no_info(struct mem_ctl_info * mci,const char * msg)737 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
738 {
739 	if (edac_mc_get_log_ce())
740 		edac_mc_printk(mci, KERN_WARNING,
741 			"CE - no information available: %s\n", msg);
742 
743 	mci->ce_noinfo_count++;
744 	mci->ce_count++;
745 }
746 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
747 
edac_mc_handle_ue(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,int row,const char * msg)748 void edac_mc_handle_ue(struct mem_ctl_info *mci,
749 		unsigned long page_frame_number,
750 		unsigned long offset_in_page, int row, const char *msg)
751 {
752 	int len = EDAC_MC_LABEL_LEN * 4;
753 	char labels[len + 1];
754 	char *pos = labels;
755 	int chan;
756 	int chars;
757 
758 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
759 
760 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
761 	if (row >= mci->nr_csrows || row < 0) {
762 		/* something is wrong */
763 		edac_mc_printk(mci, KERN_ERR,
764 			"INTERNAL ERROR: row out of range "
765 			"(%d >= %d)\n", row, mci->nr_csrows);
766 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
767 		return;
768 	}
769 
770 	chars = snprintf(pos, len + 1, "%s",
771 			 mci->csrows[row].channels[0].label);
772 	len -= chars;
773 	pos += chars;
774 
775 	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
776 		chan++) {
777 		chars = snprintf(pos, len + 1, ":%s",
778 				 mci->csrows[row].channels[chan].label);
779 		len -= chars;
780 		pos += chars;
781 	}
782 
783 	if (edac_mc_get_log_ue())
784 		edac_mc_printk(mci, KERN_EMERG,
785 			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
786 			"labels \"%s\": %s\n", page_frame_number,
787 			offset_in_page, mci->csrows[row].grain, row,
788 			labels, msg);
789 
790 	if (edac_mc_get_panic_on_ue())
791 		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
792 			"row %d, labels \"%s\": %s\n", mci->mc_idx,
793 			page_frame_number, offset_in_page,
794 			mci->csrows[row].grain, row, labels, msg);
795 
796 	mci->ue_count++;
797 	mci->csrows[row].ue_count++;
798 }
799 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
800 
edac_mc_handle_ue_no_info(struct mem_ctl_info * mci,const char * msg)801 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
802 {
803 	if (edac_mc_get_panic_on_ue())
804 		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
805 
806 	if (edac_mc_get_log_ue())
807 		edac_mc_printk(mci, KERN_WARNING,
808 			"UE - no information available: %s\n", msg);
809 	mci->ue_noinfo_count++;
810 	mci->ue_count++;
811 }
812 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
813 
814 /*************************************************************
815  * On Fully Buffered DIMM modules, this help function is
816  * called to process UE events
817  */
edac_mc_handle_fbd_ue(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channela,unsigned int channelb,char * msg)818 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
819 			unsigned int csrow,
820 			unsigned int channela,
821 			unsigned int channelb, char *msg)
822 {
823 	int len = EDAC_MC_LABEL_LEN * 4;
824 	char labels[len + 1];
825 	char *pos = labels;
826 	int chars;
827 
828 	if (csrow >= mci->nr_csrows) {
829 		/* something is wrong */
830 		edac_mc_printk(mci, KERN_ERR,
831 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
832 			csrow, mci->nr_csrows);
833 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
834 		return;
835 	}
836 
837 	if (channela >= mci->csrows[csrow].nr_channels) {
838 		/* something is wrong */
839 		edac_mc_printk(mci, KERN_ERR,
840 			"INTERNAL ERROR: channel-a out of range "
841 			"(%d >= %d)\n",
842 			channela, mci->csrows[csrow].nr_channels);
843 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
844 		return;
845 	}
846 
847 	if (channelb >= mci->csrows[csrow].nr_channels) {
848 		/* something is wrong */
849 		edac_mc_printk(mci, KERN_ERR,
850 			"INTERNAL ERROR: channel-b out of range "
851 			"(%d >= %d)\n",
852 			channelb, mci->csrows[csrow].nr_channels);
853 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
854 		return;
855 	}
856 
857 	mci->ue_count++;
858 	mci->csrows[csrow].ue_count++;
859 
860 	/* Generate the DIMM labels from the specified channels */
861 	chars = snprintf(pos, len + 1, "%s",
862 			 mci->csrows[csrow].channels[channela].label);
863 	len -= chars;
864 	pos += chars;
865 	chars = snprintf(pos, len + 1, "-%s",
866 			 mci->csrows[csrow].channels[channelb].label);
867 
868 	if (edac_mc_get_log_ue())
869 		edac_mc_printk(mci, KERN_EMERG,
870 			"UE row %d, channel-a= %d channel-b= %d "
871 			"labels \"%s\": %s\n", csrow, channela, channelb,
872 			labels, msg);
873 
874 	if (edac_mc_get_panic_on_ue())
875 		panic("UE row %d, channel-a= %d channel-b= %d "
876 			"labels \"%s\": %s\n", csrow, channela,
877 			channelb, labels, msg);
878 }
879 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
880 
881 /*************************************************************
882  * On Fully Buffered DIMM modules, this help function is
883  * called to process CE events
884  */
edac_mc_handle_fbd_ce(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channel,char * msg)885 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
886 			unsigned int csrow, unsigned int channel, char *msg)
887 {
888 
889 	/* Ensure boundary values */
890 	if (csrow >= mci->nr_csrows) {
891 		/* something is wrong */
892 		edac_mc_printk(mci, KERN_ERR,
893 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
894 			csrow, mci->nr_csrows);
895 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
896 		return;
897 	}
898 	if (channel >= mci->csrows[csrow].nr_channels) {
899 		/* something is wrong */
900 		edac_mc_printk(mci, KERN_ERR,
901 			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
902 			channel, mci->csrows[csrow].nr_channels);
903 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
904 		return;
905 	}
906 
907 	if (edac_mc_get_log_ce())
908 		/* FIXME - put in DIMM location */
909 		edac_mc_printk(mci, KERN_WARNING,
910 			"CE row %d, channel %d, label \"%s\": %s\n",
911 			csrow, channel,
912 			mci->csrows[csrow].channels[channel].label, msg);
913 
914 	mci->ce_count++;
915 	mci->csrows[csrow].ce_count++;
916 	mci->csrows[csrow].channels[channel].ce_count++;
917 }
918 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
919