• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * /dev/mcelog driver
3  *
4  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
5  * Rest from unknown author(s).
6  * 2004 Andi Kleen. Rewrote most of it.
7  * Copyright 2008 Intel Corporation
8  * Author: Andi Kleen
9  */
10 
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 
13 #include <linux/miscdevice.h>
14 #include <linux/slab.h>
15 #include <linux/kmod.h>
16 #include <linux/poll.h>
17 
18 #include "mce-internal.h"
19 
20 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
21 
22 static DEFINE_MUTEX(mce_chrdev_read_mutex);
23 
24 static char mce_helper[128];
25 static char *mce_helper_argv[2] = { mce_helper, NULL };
26 
27 /*
28  * Lockless MCE logging infrastructure.
29  * This avoids deadlocks on printk locks without having to break locks. Also
30  * separate MCEs from kernel messages to avoid bogus bug reports.
31  */
32 
33 static struct mce_log_buffer mcelog = {
34 	.signature	= MCE_LOG_SIGNATURE,
35 	.len		= MCE_LOG_LEN,
36 	.recordlen	= sizeof(struct mce),
37 };
38 
39 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
40 
41 /* User mode helper program triggered by machine check event */
42 extern char			mce_helper[128];
43 
dev_mce_log(struct notifier_block * nb,unsigned long val,void * data)44 static int dev_mce_log(struct notifier_block *nb, unsigned long val,
45 				void *data)
46 {
47 	struct mce *mce = (struct mce *)data;
48 	unsigned int entry;
49 
50 	mutex_lock(&mce_chrdev_read_mutex);
51 
52 	entry = mcelog.next;
53 
54 	/*
55 	 * When the buffer fills up discard new entries. Assume that the
56 	 * earlier errors are the more interesting ones:
57 	 */
58 	if (entry >= MCE_LOG_LEN) {
59 		set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
60 		goto unlock;
61 	}
62 
63 	mcelog.next = entry + 1;
64 
65 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
66 	mcelog.entry[entry].finished = 1;
67 
68 	/* wake processes polling /dev/mcelog */
69 	wake_up_interruptible(&mce_chrdev_wait);
70 
71 unlock:
72 	mutex_unlock(&mce_chrdev_read_mutex);
73 
74 	return NOTIFY_OK;
75 }
76 
77 static struct notifier_block dev_mcelog_nb = {
78 	.notifier_call	= dev_mce_log,
79 	.priority	= MCE_PRIO_MCELOG,
80 };
81 
mce_do_trigger(struct work_struct * work)82 static void mce_do_trigger(struct work_struct *work)
83 {
84 	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
85 }
86 
87 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
88 
89 
mce_work_trigger(void)90 void mce_work_trigger(void)
91 {
92 	if (mce_helper[0])
93 		schedule_work(&mce_trigger_work);
94 }
95 
96 static ssize_t
show_trigger(struct device * s,struct device_attribute * attr,char * buf)97 show_trigger(struct device *s, struct device_attribute *attr, char *buf)
98 {
99 	strcpy(buf, mce_helper);
100 	strcat(buf, "\n");
101 	return strlen(mce_helper) + 1;
102 }
103 
set_trigger(struct device * s,struct device_attribute * attr,const char * buf,size_t siz)104 static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
105 				const char *buf, size_t siz)
106 {
107 	char *p;
108 
109 	strncpy(mce_helper, buf, sizeof(mce_helper));
110 	mce_helper[sizeof(mce_helper)-1] = 0;
111 	p = strchr(mce_helper, '\n');
112 
113 	if (p)
114 		*p = 0;
115 
116 	return strlen(mce_helper) + !!p;
117 }
118 
119 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
120 
121 /*
122  * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
123  */
124 
125 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
126 static int mce_chrdev_open_count;	/* #times opened */
127 static int mce_chrdev_open_exclu;	/* already open exclusive? */
128 
mce_chrdev_open(struct inode * inode,struct file * file)129 static int mce_chrdev_open(struct inode *inode, struct file *file)
130 {
131 	spin_lock(&mce_chrdev_state_lock);
132 
133 	if (mce_chrdev_open_exclu ||
134 	    (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
135 		spin_unlock(&mce_chrdev_state_lock);
136 
137 		return -EBUSY;
138 	}
139 
140 	if (file->f_flags & O_EXCL)
141 		mce_chrdev_open_exclu = 1;
142 	mce_chrdev_open_count++;
143 
144 	spin_unlock(&mce_chrdev_state_lock);
145 
146 	return nonseekable_open(inode, file);
147 }
148 
mce_chrdev_release(struct inode * inode,struct file * file)149 static int mce_chrdev_release(struct inode *inode, struct file *file)
150 {
151 	spin_lock(&mce_chrdev_state_lock);
152 
153 	mce_chrdev_open_count--;
154 	mce_chrdev_open_exclu = 0;
155 
156 	spin_unlock(&mce_chrdev_state_lock);
157 
158 	return 0;
159 }
160 
161 static int mce_apei_read_done;
162 
163 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
__mce_read_apei(char __user ** ubuf,size_t usize)164 static int __mce_read_apei(char __user **ubuf, size_t usize)
165 {
166 	int rc;
167 	u64 record_id;
168 	struct mce m;
169 
170 	if (usize < sizeof(struct mce))
171 		return -EINVAL;
172 
173 	rc = apei_read_mce(&m, &record_id);
174 	/* Error or no more MCE record */
175 	if (rc <= 0) {
176 		mce_apei_read_done = 1;
177 		/*
178 		 * When ERST is disabled, mce_chrdev_read() should return
179 		 * "no record" instead of "no device."
180 		 */
181 		if (rc == -ENODEV)
182 			return 0;
183 		return rc;
184 	}
185 	rc = -EFAULT;
186 	if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
187 		return rc;
188 	/*
189 	 * In fact, we should have cleared the record after that has
190 	 * been flushed to the disk or sent to network in
191 	 * /sbin/mcelog, but we have no interface to support that now,
192 	 * so just clear it to avoid duplication.
193 	 */
194 	rc = apei_clear_mce(record_id);
195 	if (rc) {
196 		mce_apei_read_done = 1;
197 		return rc;
198 	}
199 	*ubuf += sizeof(struct mce);
200 
201 	return 0;
202 }
203 
mce_chrdev_read(struct file * filp,char __user * ubuf,size_t usize,loff_t * off)204 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
205 				size_t usize, loff_t *off)
206 {
207 	char __user *buf = ubuf;
208 	unsigned next;
209 	int i, err;
210 
211 	mutex_lock(&mce_chrdev_read_mutex);
212 
213 	if (!mce_apei_read_done) {
214 		err = __mce_read_apei(&buf, usize);
215 		if (err || buf != ubuf)
216 			goto out;
217 	}
218 
219 	/* Only supports full reads right now */
220 	err = -EINVAL;
221 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
222 		goto out;
223 
224 	next = mcelog.next;
225 	err = 0;
226 
227 	for (i = 0; i < next; i++) {
228 		struct mce *m = &mcelog.entry[i];
229 
230 		err |= copy_to_user(buf, m, sizeof(*m));
231 		buf += sizeof(*m);
232 	}
233 
234 	memset(mcelog.entry, 0, next * sizeof(struct mce));
235 	mcelog.next = 0;
236 
237 	if (err)
238 		err = -EFAULT;
239 
240 out:
241 	mutex_unlock(&mce_chrdev_read_mutex);
242 
243 	return err ? err : buf - ubuf;
244 }
245 
mce_chrdev_poll(struct file * file,poll_table * wait)246 static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
247 {
248 	poll_wait(file, &mce_chrdev_wait, wait);
249 	if (READ_ONCE(mcelog.next))
250 		return POLLIN | POLLRDNORM;
251 	if (!mce_apei_read_done && apei_check_mce())
252 		return POLLIN | POLLRDNORM;
253 	return 0;
254 }
255 
mce_chrdev_ioctl(struct file * f,unsigned int cmd,unsigned long arg)256 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
257 				unsigned long arg)
258 {
259 	int __user *p = (int __user *)arg;
260 
261 	if (!capable(CAP_SYS_ADMIN))
262 		return -EPERM;
263 
264 	switch (cmd) {
265 	case MCE_GET_RECORD_LEN:
266 		return put_user(sizeof(struct mce), p);
267 	case MCE_GET_LOG_LEN:
268 		return put_user(MCE_LOG_LEN, p);
269 	case MCE_GETCLEAR_FLAGS: {
270 		unsigned flags;
271 
272 		do {
273 			flags = mcelog.flags;
274 		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
275 
276 		return put_user(flags, p);
277 	}
278 	default:
279 		return -ENOTTY;
280 	}
281 }
282 
mce_register_injector_chain(struct notifier_block * nb)283 void mce_register_injector_chain(struct notifier_block *nb)
284 {
285 	blocking_notifier_chain_register(&mce_injector_chain, nb);
286 }
287 EXPORT_SYMBOL_GPL(mce_register_injector_chain);
288 
mce_unregister_injector_chain(struct notifier_block * nb)289 void mce_unregister_injector_chain(struct notifier_block *nb)
290 {
291 	blocking_notifier_chain_unregister(&mce_injector_chain, nb);
292 }
293 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
294 
mce_chrdev_write(struct file * filp,const char __user * ubuf,size_t usize,loff_t * off)295 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
296 				size_t usize, loff_t *off)
297 {
298 	struct mce m;
299 
300 	if (!capable(CAP_SYS_ADMIN))
301 		return -EPERM;
302 	/*
303 	 * There are some cases where real MSR reads could slip
304 	 * through.
305 	 */
306 	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
307 		return -EIO;
308 
309 	if ((unsigned long)usize > sizeof(struct mce))
310 		usize = sizeof(struct mce);
311 	if (copy_from_user(&m, ubuf, usize))
312 		return -EFAULT;
313 
314 	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
315 		return -EINVAL;
316 
317 	/*
318 	 * Need to give user space some time to set everything up,
319 	 * so do it a jiffie or two later everywhere.
320 	 */
321 	schedule_timeout(2);
322 
323 	blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
324 
325 	return usize;
326 }
327 
328 static const struct file_operations mce_chrdev_ops = {
329 	.open			= mce_chrdev_open,
330 	.release		= mce_chrdev_release,
331 	.read			= mce_chrdev_read,
332 	.write			= mce_chrdev_write,
333 	.poll			= mce_chrdev_poll,
334 	.unlocked_ioctl		= mce_chrdev_ioctl,
335 	.llseek			= no_llseek,
336 };
337 
338 static struct miscdevice mce_chrdev_device = {
339 	MISC_MCELOG_MINOR,
340 	"mcelog",
341 	&mce_chrdev_ops,
342 };
343 
dev_mcelog_init_device(void)344 static __init int dev_mcelog_init_device(void)
345 {
346 	int err;
347 
348 	/* register character device /dev/mcelog */
349 	err = misc_register(&mce_chrdev_device);
350 	if (err) {
351 		if (err == -EBUSY)
352 			/* Xen dom0 might have registered the device already. */
353 			pr_info("Unable to init device /dev/mcelog, already registered");
354 		else
355 			pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
356 
357 		return err;
358 	}
359 
360 	mce_register_decode_chain(&dev_mcelog_nb);
361 	return 0;
362 }
363 device_initcall_sync(dev_mcelog_init_device);
364