• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/aio.h>
24 #include <linux/mm.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/bitmap.h>
29 #include <linux/signal.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/fs.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/kobject.h>
39 #include <linux/net.h>
40 #include <linux/sysrq.h>
41 #include <linux/highuid.h>
42 #include <linux/writeback.h>
43 #include <linux/ratelimit.h>
44 #include <linux/compaction.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/dnotify.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/kprobes.h>
60 #include <linux/pipe_fs_i.h>
61 #include <linux/oom.h>
62 #include <linux/kmod.h>
63 #include <linux/capability.h>
64 #include <linux/binfmts.h>
65 #include <linux/sched/sysctl.h>
66 #include <linux/sched/coredump.h>
67 #include <linux/kexec.h>
68 #include <linux/bpf.h>
69 #include <linux/mount.h>
70 #include <linux/userfaultfd_k.h>
71 #include <linux/coredump.h>
72 #include <linux/latencytop.h>
73 #include <linux/pid.h>
74 
75 #include "../lib/kstrtox.h"
76 
77 #include <linux/uaccess.h>
78 #include <asm/processor.h>
79 
80 #ifdef CONFIG_X86
81 #include <asm/nmi.h>
82 #include <asm/stacktrace.h>
83 #include <asm/io.h>
84 #endif
85 #ifdef CONFIG_SPARC
86 #include <asm/setup.h>
87 #endif
88 #ifdef CONFIG_BSD_PROCESS_ACCT
89 #include <linux/acct.h>
90 #endif
91 #ifdef CONFIG_RT_MUTEXES
92 #include <linux/rtmutex.h>
93 #endif
94 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
95 #include <linux/lockdep.h>
96 #endif
97 #ifdef CONFIG_CHR_DEV_SG
98 #include <scsi/sg.h>
99 #endif
100 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
101 #include <linux/stackleak.h>
102 #endif
103 #ifdef CONFIG_LOCKUP_DETECTOR
104 #include <linux/nmi.h>
105 #endif
106 
107 #if defined(CONFIG_SYSCTL)
108 
109 /* External variables not in a header file. */
110 extern int extra_free_kbytes;
111 
112 /* Constants used for minimum and  maximum */
113 #ifdef CONFIG_LOCKUP_DETECTOR
114 static int sixty = 60;
115 #endif
116 
117 static unsigned long zero_ul;
118 static unsigned long one_ul = 1;
119 static unsigned long long_max = LONG_MAX;
120 #ifdef CONFIG_PRINTK
121 static int ten_thousand = 10000;
122 #endif
123 #ifdef CONFIG_PERF_EVENTS
124 static int six_hundred_forty_kb = 640 * 1024;
125 #endif
126 
127 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
128 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
129 
130 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
131 static int maxolduid = 65535;
132 static int minolduid;
133 
134 static int ngroups_max = NGROUPS_MAX;
135 static const int cap_last_cap = CAP_LAST_CAP;
136 
137 /*
138  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
139  * and hung_task_check_interval_secs
140  */
141 #ifdef CONFIG_DETECT_HUNG_TASK
142 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
143 #endif
144 
145 #ifdef CONFIG_INOTIFY_USER
146 #include <linux/inotify.h>
147 #endif
148 
149 #ifdef CONFIG_PROC_SYSCTL
150 
151 /**
152  * enum sysctl_writes_mode - supported sysctl write modes
153  *
154  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
155  *	to be written, and multiple writes on the same sysctl file descriptor
156  *	will rewrite the sysctl value, regardless of file position. No warning
157  *	is issued when the initial position is not 0.
158  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
159  *	not 0.
160  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
161  *	file position 0 and the value must be fully contained in the buffer
162  *	sent to the write syscall. If dealing with strings respect the file
163  *	position, but restrict this to the max length of the buffer, anything
164  *	passed the max length will be ignored. Multiple writes will append
165  *	to the buffer.
166  *
167  * These write modes control how current file position affects the behavior of
168  * updating sysctl values through the proc interface on each write.
169  */
170 enum sysctl_writes_mode {
171 	SYSCTL_WRITES_LEGACY		= -1,
172 	SYSCTL_WRITES_WARN		= 0,
173 	SYSCTL_WRITES_STRICT		= 1,
174 };
175 
176 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
177 #endif /* CONFIG_PROC_SYSCTL */
178 
179 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
180     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
181 int sysctl_legacy_va_layout;
182 #endif
183 
184 #ifdef CONFIG_SCHED_DEBUG
185 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
186 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
187 static int min_wakeup_granularity_ns;			/* 0 usecs */
188 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
189 #ifdef CONFIG_SMP
190 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
191 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
192 #endif /* CONFIG_SMP */
193 #endif /* CONFIG_SCHED_DEBUG */
194 
195 #ifdef CONFIG_COMPACTION
196 static int min_extfrag_threshold;
197 static int max_extfrag_threshold = 1000;
198 #endif
199 
200 #endif /* CONFIG_SYSCTL */
201 
202 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
bpf_stats_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)203 static int bpf_stats_handler(struct ctl_table *table, int write,
204 			     void *buffer, size_t *lenp, loff_t *ppos)
205 {
206 	struct static_key *key = (struct static_key *)table->data;
207 	static int saved_val;
208 	int val, ret;
209 	struct ctl_table tmp = {
210 		.data   = &val,
211 		.maxlen = sizeof(val),
212 		.mode   = table->mode,
213 		.extra1 = SYSCTL_ZERO,
214 		.extra2 = SYSCTL_ONE,
215 	};
216 
217 	if (write && !capable(CAP_SYS_ADMIN))
218 		return -EPERM;
219 
220 	mutex_lock(&bpf_stats_enabled_mutex);
221 	val = saved_val;
222 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
223 	if (write && !ret && val != saved_val) {
224 		if (val)
225 			static_key_slow_inc(key);
226 		else
227 			static_key_slow_dec(key);
228 		saved_val = val;
229 	}
230 	mutex_unlock(&bpf_stats_enabled_mutex);
231 	return ret;
232 }
233 
unpriv_ebpf_notify(int new_state)234 void __weak unpriv_ebpf_notify(int new_state)
235 {
236 }
237 
bpf_unpriv_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)238 static int bpf_unpriv_handler(struct ctl_table *table, int write,
239 			      void *buffer, size_t *lenp, loff_t *ppos)
240 {
241 	int ret, unpriv_enable = *(int *)table->data;
242 	bool locked_state = unpriv_enable == 1;
243 	struct ctl_table tmp = *table;
244 
245 	if (write && !capable(CAP_SYS_ADMIN))
246 		return -EPERM;
247 
248 	tmp.data = &unpriv_enable;
249 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
250 	if (write && !ret) {
251 		if (locked_state && unpriv_enable != 1)
252 			return -EPERM;
253 		*(int *)table->data = unpriv_enable;
254 	}
255 
256 	unpriv_ebpf_notify(unpriv_enable);
257 
258 	return ret;
259 }
260 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
261 
262 /*
263  * /proc/sys support
264  */
265 
266 #ifdef CONFIG_PROC_SYSCTL
267 
_proc_do_string(char * data,int maxlen,int write,char * buffer,size_t * lenp,loff_t * ppos)268 static int _proc_do_string(char *data, int maxlen, int write,
269 		char *buffer, size_t *lenp, loff_t *ppos)
270 {
271 	size_t len;
272 	char c, *p;
273 
274 	if (!data || !maxlen || !*lenp) {
275 		*lenp = 0;
276 		return 0;
277 	}
278 
279 	if (write) {
280 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
281 			/* Only continue writes not past the end of buffer. */
282 			len = strlen(data);
283 			if (len > maxlen - 1)
284 				len = maxlen - 1;
285 
286 			if (*ppos > len)
287 				return 0;
288 			len = *ppos;
289 		} else {
290 			/* Start writing from beginning of buffer. */
291 			len = 0;
292 		}
293 
294 		*ppos += *lenp;
295 		p = buffer;
296 		while ((p - buffer) < *lenp && len < maxlen - 1) {
297 			c = *(p++);
298 			if (c == 0 || c == '\n')
299 				break;
300 			data[len++] = c;
301 		}
302 		data[len] = 0;
303 	} else {
304 		len = strlen(data);
305 		if (len > maxlen)
306 			len = maxlen;
307 
308 		if (*ppos > len) {
309 			*lenp = 0;
310 			return 0;
311 		}
312 
313 		data += *ppos;
314 		len  -= *ppos;
315 
316 		if (len > *lenp)
317 			len = *lenp;
318 		if (len)
319 			memcpy(buffer, data, len);
320 		if (len < *lenp) {
321 			buffer[len] = '\n';
322 			len++;
323 		}
324 		*lenp = len;
325 		*ppos += len;
326 	}
327 	return 0;
328 }
329 
warn_sysctl_write(struct ctl_table * table)330 static void warn_sysctl_write(struct ctl_table *table)
331 {
332 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
333 		"This will not be supported in the future. To silence this\n"
334 		"warning, set kernel.sysctl_writes_strict = -1\n",
335 		current->comm, table->procname);
336 }
337 
338 /**
339  * proc_first_pos_non_zero_ignore - check if first position is allowed
340  * @ppos: file position
341  * @table: the sysctl table
342  *
343  * Returns true if the first position is non-zero and the sysctl_writes_strict
344  * mode indicates this is not allowed for numeric input types. String proc
345  * handlers can ignore the return value.
346  */
proc_first_pos_non_zero_ignore(loff_t * ppos,struct ctl_table * table)347 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
348 					   struct ctl_table *table)
349 {
350 	if (!*ppos)
351 		return false;
352 
353 	switch (sysctl_writes_strict) {
354 	case SYSCTL_WRITES_STRICT:
355 		return true;
356 	case SYSCTL_WRITES_WARN:
357 		warn_sysctl_write(table);
358 		return false;
359 	default:
360 		return false;
361 	}
362 }
363 
364 /**
365  * proc_dostring - read a string sysctl
366  * @table: the sysctl table
367  * @write: %TRUE if this is a write to the sysctl file
368  * @buffer: the user buffer
369  * @lenp: the size of the user buffer
370  * @ppos: file position
371  *
372  * Reads/writes a string from/to the user buffer. If the kernel
373  * buffer provided is not large enough to hold the string, the
374  * string is truncated. The copied string is %NULL-terminated.
375  * If the string is being read by the user process, it is copied
376  * and a newline '\n' is added. It is truncated if the buffer is
377  * not large enough.
378  *
379  * Returns 0 on success.
380  */
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)381 int proc_dostring(struct ctl_table *table, int write,
382 		  void *buffer, size_t *lenp, loff_t *ppos)
383 {
384 	if (write)
385 		proc_first_pos_non_zero_ignore(ppos, table);
386 
387 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
388 			ppos);
389 }
390 
proc_skip_spaces(char ** buf,size_t * size)391 static void proc_skip_spaces(char **buf, size_t *size)
392 {
393 	while (*size) {
394 		if (!isspace(**buf))
395 			break;
396 		(*size)--;
397 		(*buf)++;
398 	}
399 }
400 
proc_skip_char(char ** buf,size_t * size,const char v)401 static void proc_skip_char(char **buf, size_t *size, const char v)
402 {
403 	while (*size) {
404 		if (**buf != v)
405 			break;
406 		(*size)--;
407 		(*buf)++;
408 	}
409 }
410 
411 /**
412  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
413  *                   fail on overflow
414  *
415  * @cp: kernel buffer containing the string to parse
416  * @endp: pointer to store the trailing characters
417  * @base: the base to use
418  * @res: where the parsed integer will be stored
419  *
420  * In case of success 0 is returned and @res will contain the parsed integer,
421  * @endp will hold any trailing characters.
422  * This function will fail the parse on overflow. If there wasn't an overflow
423  * the function will defer the decision what characters count as invalid to the
424  * caller.
425  */
strtoul_lenient(const char * cp,char ** endp,unsigned int base,unsigned long * res)426 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
427 			   unsigned long *res)
428 {
429 	unsigned long long result;
430 	unsigned int rv;
431 
432 	cp = _parse_integer_fixup_radix(cp, &base);
433 	rv = _parse_integer(cp, base, &result);
434 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
435 		return -ERANGE;
436 
437 	cp += rv;
438 
439 	if (endp)
440 		*endp = (char *)cp;
441 
442 	*res = (unsigned long)result;
443 	return 0;
444 }
445 
446 #define TMPBUFLEN 22
447 /**
448  * proc_get_long - reads an ASCII formatted integer from a user buffer
449  *
450  * @buf: a kernel buffer
451  * @size: size of the kernel buffer
452  * @val: this is where the number will be stored
453  * @neg: set to %TRUE if number is negative
454  * @perm_tr: a vector which contains the allowed trailers
455  * @perm_tr_len: size of the perm_tr vector
456  * @tr: pointer to store the trailer character
457  *
458  * In case of success %0 is returned and @buf and @size are updated with
459  * the amount of bytes read. If @tr is non-NULL and a trailing
460  * character exists (size is non-zero after returning from this
461  * function), @tr is updated with the trailing character.
462  */
proc_get_long(char ** buf,size_t * size,unsigned long * val,bool * neg,const char * perm_tr,unsigned perm_tr_len,char * tr)463 static int proc_get_long(char **buf, size_t *size,
464 			  unsigned long *val, bool *neg,
465 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
466 {
467 	char *p, tmp[TMPBUFLEN];
468 	ssize_t len = *size;
469 
470 	if (len <= 0)
471 		return -EINVAL;
472 
473 	if (len > TMPBUFLEN - 1)
474 		len = TMPBUFLEN - 1;
475 
476 	memcpy(tmp, *buf, len);
477 
478 	tmp[len] = 0;
479 	p = tmp;
480 	if (*p == '-' && *size > 1) {
481 		*neg = true;
482 		p++;
483 	} else
484 		*neg = false;
485 	if (!isdigit(*p))
486 		return -EINVAL;
487 
488 	if (strtoul_lenient(p, &p, 0, val))
489 		return -EINVAL;
490 
491 	len = p - tmp;
492 
493 	/* We don't know if the next char is whitespace thus we may accept
494 	 * invalid integers (e.g. 1234...a) or two integers instead of one
495 	 * (e.g. 123...1). So lets not allow such large numbers. */
496 	if (len == TMPBUFLEN - 1)
497 		return -EINVAL;
498 
499 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
500 		return -EINVAL;
501 
502 	if (tr && (len < *size))
503 		*tr = *p;
504 
505 	*buf += len;
506 	*size -= len;
507 
508 	return 0;
509 }
510 
511 /**
512  * proc_put_long - converts an integer to a decimal ASCII formatted string
513  *
514  * @buf: the user buffer
515  * @size: the size of the user buffer
516  * @val: the integer to be converted
517  * @neg: sign of the number, %TRUE for negative
518  *
519  * In case of success @buf and @size are updated with the amount of bytes
520  * written.
521  */
proc_put_long(void ** buf,size_t * size,unsigned long val,bool neg)522 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
523 {
524 	int len;
525 	char tmp[TMPBUFLEN], *p = tmp;
526 
527 	sprintf(p, "%s%lu", neg ? "-" : "", val);
528 	len = strlen(tmp);
529 	if (len > *size)
530 		len = *size;
531 	memcpy(*buf, tmp, len);
532 	*size -= len;
533 	*buf += len;
534 }
535 #undef TMPBUFLEN
536 
proc_put_char(void ** buf,size_t * size,char c)537 static void proc_put_char(void **buf, size_t *size, char c)
538 {
539 	if (*size) {
540 		char **buffer = (char **)buf;
541 		**buffer = c;
542 
543 		(*size)--;
544 		(*buffer)++;
545 		*buf = *buffer;
546 	}
547 }
548 
do_proc_dointvec_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)549 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
550 				 int *valp,
551 				 int write, void *data)
552 {
553 	if (write) {
554 		if (*negp) {
555 			if (*lvalp > (unsigned long) INT_MAX + 1)
556 				return -EINVAL;
557 			WRITE_ONCE(*valp, -*lvalp);
558 		} else {
559 			if (*lvalp > (unsigned long) INT_MAX)
560 				return -EINVAL;
561 			WRITE_ONCE(*valp, *lvalp);
562 		}
563 	} else {
564 		int val = READ_ONCE(*valp);
565 		if (val < 0) {
566 			*negp = true;
567 			*lvalp = -(unsigned long)val;
568 		} else {
569 			*negp = false;
570 			*lvalp = (unsigned long)val;
571 		}
572 	}
573 	return 0;
574 }
575 
do_proc_douintvec_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)576 static int do_proc_douintvec_conv(unsigned long *lvalp,
577 				  unsigned int *valp,
578 				  int write, void *data)
579 {
580 	if (write) {
581 		if (*lvalp > UINT_MAX)
582 			return -EINVAL;
583 		WRITE_ONCE(*valp, *lvalp);
584 	} else {
585 		unsigned int val = READ_ONCE(*valp);
586 		*lvalp = (unsigned long)val;
587 	}
588 	return 0;
589 }
590 
591 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
592 
__do_proc_dointvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)593 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
594 		  int write, void *buffer,
595 		  size_t *lenp, loff_t *ppos,
596 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
597 			      int write, void *data),
598 		  void *data)
599 {
600 	int *i, vleft, first = 1, err = 0;
601 	size_t left;
602 	char *p;
603 
604 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
605 		*lenp = 0;
606 		return 0;
607 	}
608 
609 	i = (int *) tbl_data;
610 	vleft = table->maxlen / sizeof(*i);
611 	left = *lenp;
612 
613 	if (!conv)
614 		conv = do_proc_dointvec_conv;
615 
616 	if (write) {
617 		if (proc_first_pos_non_zero_ignore(ppos, table))
618 			goto out;
619 
620 		if (left > PAGE_SIZE - 1)
621 			left = PAGE_SIZE - 1;
622 		p = buffer;
623 	}
624 
625 	for (; left && vleft--; i++, first=0) {
626 		unsigned long lval;
627 		bool neg;
628 
629 		if (write) {
630 			proc_skip_spaces(&p, &left);
631 
632 			if (!left)
633 				break;
634 			err = proc_get_long(&p, &left, &lval, &neg,
635 					     proc_wspace_sep,
636 					     sizeof(proc_wspace_sep), NULL);
637 			if (err)
638 				break;
639 			if (conv(&neg, &lval, i, 1, data)) {
640 				err = -EINVAL;
641 				break;
642 			}
643 		} else {
644 			if (conv(&neg, &lval, i, 0, data)) {
645 				err = -EINVAL;
646 				break;
647 			}
648 			if (!first)
649 				proc_put_char(&buffer, &left, '\t');
650 			proc_put_long(&buffer, &left, lval, neg);
651 		}
652 	}
653 
654 	if (!write && !first && left && !err)
655 		proc_put_char(&buffer, &left, '\n');
656 	if (write && !err && left)
657 		proc_skip_spaces(&p, &left);
658 	if (write && first)
659 		return err ? : -EINVAL;
660 	*lenp -= left;
661 out:
662 	*ppos += *lenp;
663 	return err;
664 }
665 
do_proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)666 static int do_proc_dointvec(struct ctl_table *table, int write,
667 		  void *buffer, size_t *lenp, loff_t *ppos,
668 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
669 			      int write, void *data),
670 		  void *data)
671 {
672 	return __do_proc_dointvec(table->data, table, write,
673 			buffer, lenp, ppos, conv, data);
674 }
675 
do_proc_douintvec_w(unsigned int * tbl_data,struct ctl_table * table,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)676 static int do_proc_douintvec_w(unsigned int *tbl_data,
677 			       struct ctl_table *table,
678 			       void *buffer,
679 			       size_t *lenp, loff_t *ppos,
680 			       int (*conv)(unsigned long *lvalp,
681 					   unsigned int *valp,
682 					   int write, void *data),
683 			       void *data)
684 {
685 	unsigned long lval;
686 	int err = 0;
687 	size_t left;
688 	bool neg;
689 	char *p = buffer;
690 
691 	left = *lenp;
692 
693 	if (proc_first_pos_non_zero_ignore(ppos, table))
694 		goto bail_early;
695 
696 	if (left > PAGE_SIZE - 1)
697 		left = PAGE_SIZE - 1;
698 
699 	proc_skip_spaces(&p, &left);
700 	if (!left) {
701 		err = -EINVAL;
702 		goto out_free;
703 	}
704 
705 	err = proc_get_long(&p, &left, &lval, &neg,
706 			     proc_wspace_sep,
707 			     sizeof(proc_wspace_sep), NULL);
708 	if (err || neg) {
709 		err = -EINVAL;
710 		goto out_free;
711 	}
712 
713 	if (conv(&lval, tbl_data, 1, data)) {
714 		err = -EINVAL;
715 		goto out_free;
716 	}
717 
718 	if (!err && left)
719 		proc_skip_spaces(&p, &left);
720 
721 out_free:
722 	if (err)
723 		return -EINVAL;
724 
725 	return 0;
726 
727 	/* This is in keeping with old __do_proc_dointvec() */
728 bail_early:
729 	*ppos += *lenp;
730 	return err;
731 }
732 
do_proc_douintvec_r(unsigned int * tbl_data,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)733 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
734 			       size_t *lenp, loff_t *ppos,
735 			       int (*conv)(unsigned long *lvalp,
736 					   unsigned int *valp,
737 					   int write, void *data),
738 			       void *data)
739 {
740 	unsigned long lval;
741 	int err = 0;
742 	size_t left;
743 
744 	left = *lenp;
745 
746 	if (conv(&lval, tbl_data, 0, data)) {
747 		err = -EINVAL;
748 		goto out;
749 	}
750 
751 	proc_put_long(&buffer, &left, lval, false);
752 	if (!left)
753 		goto out;
754 
755 	proc_put_char(&buffer, &left, '\n');
756 
757 out:
758 	*lenp -= left;
759 	*ppos += *lenp;
760 
761 	return err;
762 }
763 
__do_proc_douintvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)764 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
765 			       int write, void *buffer,
766 			       size_t *lenp, loff_t *ppos,
767 			       int (*conv)(unsigned long *lvalp,
768 					   unsigned int *valp,
769 					   int write, void *data),
770 			       void *data)
771 {
772 	unsigned int *i, vleft;
773 
774 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
775 		*lenp = 0;
776 		return 0;
777 	}
778 
779 	i = (unsigned int *) tbl_data;
780 	vleft = table->maxlen / sizeof(*i);
781 
782 	/*
783 	 * Arrays are not supported, keep this simple. *Do not* add
784 	 * support for them.
785 	 */
786 	if (vleft != 1) {
787 		*lenp = 0;
788 		return -EINVAL;
789 	}
790 
791 	if (!conv)
792 		conv = do_proc_douintvec_conv;
793 
794 	if (write)
795 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
796 					   conv, data);
797 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
798 }
799 
do_proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)800 static int do_proc_douintvec(struct ctl_table *table, int write,
801 			     void *buffer, size_t *lenp, loff_t *ppos,
802 			     int (*conv)(unsigned long *lvalp,
803 					 unsigned int *valp,
804 					 int write, void *data),
805 			     void *data)
806 {
807 	return __do_proc_douintvec(table->data, table, write,
808 				   buffer, lenp, ppos, conv, data);
809 }
810 
811 /**
812  * proc_dointvec - read a vector of integers
813  * @table: the sysctl table
814  * @write: %TRUE if this is a write to the sysctl file
815  * @buffer: the user buffer
816  * @lenp: the size of the user buffer
817  * @ppos: file position
818  *
819  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
820  * values from/to the user buffer, treated as an ASCII string.
821  *
822  * Returns 0 on success.
823  */
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)824 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
825 		  size_t *lenp, loff_t *ppos)
826 {
827 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
828 }
829 
830 #ifdef CONFIG_COMPACTION
proc_dointvec_minmax_warn_RT_change(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)831 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
832 		int write, void *buffer, size_t *lenp, loff_t *ppos)
833 {
834 	int ret, old;
835 
836 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
837 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
838 
839 	old = *(int *)table->data;
840 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
841 	if (ret)
842 		return ret;
843 	if (old != *(int *)table->data)
844 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
845 			     table->procname, current->comm,
846 			     task_pid_nr(current));
847 	return ret;
848 }
849 #endif
850 
851 /**
852  * proc_douintvec - read a vector of unsigned integers
853  * @table: the sysctl table
854  * @write: %TRUE if this is a write to the sysctl file
855  * @buffer: the user buffer
856  * @lenp: the size of the user buffer
857  * @ppos: file position
858  *
859  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
860  * values from/to the user buffer, treated as an ASCII string.
861  *
862  * Returns 0 on success.
863  */
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)864 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
865 		size_t *lenp, loff_t *ppos)
866 {
867 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
868 				 do_proc_douintvec_conv, NULL);
869 }
870 
871 /*
872  * Taint values can only be increased
873  * This means we can safely use a temporary.
874  */
proc_taint(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)875 static int proc_taint(struct ctl_table *table, int write,
876 			       void *buffer, size_t *lenp, loff_t *ppos)
877 {
878 	struct ctl_table t;
879 	unsigned long tmptaint = get_taint();
880 	int err;
881 
882 	if (write && !capable(CAP_SYS_ADMIN))
883 		return -EPERM;
884 
885 	t = *table;
886 	t.data = &tmptaint;
887 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
888 	if (err < 0)
889 		return err;
890 
891 	if (write) {
892 		int i;
893 
894 		/*
895 		 * If we are relying on panic_on_taint not producing
896 		 * false positives due to userspace input, bail out
897 		 * before setting the requested taint flags.
898 		 */
899 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
900 			return -EINVAL;
901 
902 		/*
903 		 * Poor man's atomic or. Not worth adding a primitive
904 		 * to everyone's atomic.h for this
905 		 */
906 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
907 			if ((1UL << i) & tmptaint)
908 				add_taint(i, LOCKDEP_STILL_OK);
909 	}
910 
911 	return err;
912 }
913 
914 #ifdef CONFIG_PRINTK
proc_dointvec_minmax_sysadmin(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)915 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
916 				void *buffer, size_t *lenp, loff_t *ppos)
917 {
918 	if (write && !capable(CAP_SYS_ADMIN))
919 		return -EPERM;
920 
921 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
922 }
923 #endif
924 
925 /**
926  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
927  * @min: pointer to minimum allowable value
928  * @max: pointer to maximum allowable value
929  *
930  * The do_proc_dointvec_minmax_conv_param structure provides the
931  * minimum and maximum values for doing range checking for those sysctl
932  * parameters that use the proc_dointvec_minmax() handler.
933  */
934 struct do_proc_dointvec_minmax_conv_param {
935 	int *min;
936 	int *max;
937 };
938 
do_proc_dointvec_minmax_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)939 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
940 					int *valp,
941 					int write, void *data)
942 {
943 	int tmp, ret;
944 	struct do_proc_dointvec_minmax_conv_param *param = data;
945 	/*
946 	 * If writing, first do so via a temporary local int so we can
947 	 * bounds-check it before touching *valp.
948 	 */
949 	int *ip = write ? &tmp : valp;
950 
951 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
952 	if (ret)
953 		return ret;
954 
955 	if (write) {
956 		if ((param->min && *param->min > tmp) ||
957 		    (param->max && *param->max < tmp))
958 			return -EINVAL;
959 		WRITE_ONCE(*valp, tmp);
960 	}
961 
962 	return 0;
963 }
964 
965 /**
966  * proc_dointvec_minmax - read a vector of integers with min/max values
967  * @table: the sysctl table
968  * @write: %TRUE if this is a write to the sysctl file
969  * @buffer: the user buffer
970  * @lenp: the size of the user buffer
971  * @ppos: file position
972  *
973  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
974  * values from/to the user buffer, treated as an ASCII string.
975  *
976  * This routine will ensure the values are within the range specified by
977  * table->extra1 (min) and table->extra2 (max).
978  *
979  * Returns 0 on success or -EINVAL on write when the range check fails.
980  */
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)981 int proc_dointvec_minmax(struct ctl_table *table, int write,
982 		  void *buffer, size_t *lenp, loff_t *ppos)
983 {
984 	struct do_proc_dointvec_minmax_conv_param param = {
985 		.min = (int *) table->extra1,
986 		.max = (int *) table->extra2,
987 	};
988 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
989 				do_proc_dointvec_minmax_conv, &param);
990 }
991 
992 /**
993  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
994  * @min: pointer to minimum allowable value
995  * @max: pointer to maximum allowable value
996  *
997  * The do_proc_douintvec_minmax_conv_param structure provides the
998  * minimum and maximum values for doing range checking for those sysctl
999  * parameters that use the proc_douintvec_minmax() handler.
1000  */
1001 struct do_proc_douintvec_minmax_conv_param {
1002 	unsigned int *min;
1003 	unsigned int *max;
1004 };
1005 
do_proc_douintvec_minmax_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)1006 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
1007 					 unsigned int *valp,
1008 					 int write, void *data)
1009 {
1010 	int ret;
1011 	unsigned int tmp;
1012 	struct do_proc_douintvec_minmax_conv_param *param = data;
1013 	/* write via temporary local uint for bounds-checking */
1014 	unsigned int *up = write ? &tmp : valp;
1015 
1016 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
1017 	if (ret)
1018 		return ret;
1019 
1020 	if (write) {
1021 		if ((param->min && *param->min > tmp) ||
1022 		    (param->max && *param->max < tmp))
1023 			return -ERANGE;
1024 
1025 		WRITE_ONCE(*valp, tmp);
1026 	}
1027 
1028 	return 0;
1029 }
1030 
1031 /**
1032  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
1033  * @table: the sysctl table
1034  * @write: %TRUE if this is a write to the sysctl file
1035  * @buffer: the user buffer
1036  * @lenp: the size of the user buffer
1037  * @ppos: file position
1038  *
1039  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
1040  * values from/to the user buffer, treated as an ASCII string. Negative
1041  * strings are not allowed.
1042  *
1043  * This routine will ensure the values are within the range specified by
1044  * table->extra1 (min) and table->extra2 (max). There is a final sanity
1045  * check for UINT_MAX to avoid having to support wrap around uses from
1046  * userspace.
1047  *
1048  * Returns 0 on success or -ERANGE on write when the range check fails.
1049  */
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1050 int proc_douintvec_minmax(struct ctl_table *table, int write,
1051 			  void *buffer, size_t *lenp, loff_t *ppos)
1052 {
1053 	struct do_proc_douintvec_minmax_conv_param param = {
1054 		.min = (unsigned int *) table->extra1,
1055 		.max = (unsigned int *) table->extra2,
1056 	};
1057 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1058 				 do_proc_douintvec_minmax_conv, &param);
1059 }
1060 
1061 /**
1062  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
1063  * @table: the sysctl table
1064  * @write: %TRUE if this is a write to the sysctl file
1065  * @buffer: the user buffer
1066  * @lenp: the size of the user buffer
1067  * @ppos: file position
1068  *
1069  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
1070  * values from/to the user buffer, treated as an ASCII string. Negative
1071  * strings are not allowed.
1072  *
1073  * This routine will ensure the values are within the range specified by
1074  * table->extra1 (min) and table->extra2 (max).
1075  *
1076  * Returns 0 on success or an error on write when the range check fails.
1077  */
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1078 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1079 			void *buffer, size_t *lenp, loff_t *ppos)
1080 {
1081 	struct ctl_table tmp;
1082 	unsigned int min = 0, max = 255U, val;
1083 	u8 *data = table->data;
1084 	struct do_proc_douintvec_minmax_conv_param param = {
1085 		.min = &min,
1086 		.max = &max,
1087 	};
1088 	int res;
1089 
1090 	/* Do not support arrays yet. */
1091 	if (table->maxlen != sizeof(u8))
1092 		return -EINVAL;
1093 
1094 	if (table->extra1) {
1095 		min = *(unsigned int *) table->extra1;
1096 		if (min > 255U)
1097 			return -EINVAL;
1098 	}
1099 	if (table->extra2) {
1100 		max = *(unsigned int *) table->extra2;
1101 		if (max > 255U)
1102 			return -EINVAL;
1103 	}
1104 
1105 	tmp = *table;
1106 
1107 	tmp.maxlen = sizeof(val);
1108 	tmp.data = &val;
1109 	val = READ_ONCE(*data);
1110 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1111 				do_proc_douintvec_minmax_conv, &param);
1112 	if (res)
1113 		return res;
1114 	if (write)
1115 		WRITE_ONCE(*data, val);
1116 	return 0;
1117 }
1118 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1119 
do_proc_dopipe_max_size_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)1120 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1121 					unsigned int *valp,
1122 					int write, void *data)
1123 {
1124 	if (write) {
1125 		unsigned int val;
1126 
1127 		val = round_pipe_size(*lvalp);
1128 		if (val == 0)
1129 			return -EINVAL;
1130 
1131 		*valp = val;
1132 	} else {
1133 		unsigned int val = *valp;
1134 		*lvalp = (unsigned long) val;
1135 	}
1136 
1137 	return 0;
1138 }
1139 
proc_dopipe_max_size(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1140 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1141 				void *buffer, size_t *lenp, loff_t *ppos)
1142 {
1143 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1144 				 do_proc_dopipe_max_size_conv, NULL);
1145 }
1146 
validate_coredump_safety(void)1147 static void validate_coredump_safety(void)
1148 {
1149 #ifdef CONFIG_COREDUMP
1150 	if (suid_dumpable == SUID_DUMP_ROOT &&
1151 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
1152 		printk(KERN_WARNING
1153 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
1154 "Pipe handler or fully qualified core dump path required.\n"
1155 "Set kernel.core_pattern before fs.suid_dumpable.\n"
1156 		);
1157 	}
1158 #endif
1159 }
1160 
proc_dointvec_minmax_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1161 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
1162 		void *buffer, size_t *lenp, loff_t *ppos)
1163 {
1164 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1165 	if (!error)
1166 		validate_coredump_safety();
1167 	return error;
1168 }
1169 
1170 #ifdef CONFIG_COREDUMP
proc_dostring_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1171 static int proc_dostring_coredump(struct ctl_table *table, int write,
1172 		  void *buffer, size_t *lenp, loff_t *ppos)
1173 {
1174 	int error = proc_dostring(table, write, buffer, lenp, ppos);
1175 	if (!error)
1176 		validate_coredump_safety();
1177 	return error;
1178 }
1179 #endif
1180 
1181 #ifdef CONFIG_MAGIC_SYSRQ
sysrq_sysctl_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1182 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1183 				void *buffer, size_t *lenp, loff_t *ppos)
1184 {
1185 	int tmp, ret;
1186 
1187 	tmp = sysrq_mask();
1188 
1189 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1190 			       lenp, ppos, NULL, NULL);
1191 	if (ret || !write)
1192 		return ret;
1193 
1194 	if (write)
1195 		sysrq_toggle_support(tmp);
1196 
1197 	return 0;
1198 }
1199 #endif
1200 
__do_proc_doulongvec_minmax(void * data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1201 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1202 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1203 		unsigned long convmul, unsigned long convdiv)
1204 {
1205 	unsigned long *i, *min, *max;
1206 	int vleft, first = 1, err = 0;
1207 	size_t left;
1208 	char *p;
1209 
1210 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1211 		*lenp = 0;
1212 		return 0;
1213 	}
1214 
1215 	i = (unsigned long *) data;
1216 	min = (unsigned long *) table->extra1;
1217 	max = (unsigned long *) table->extra2;
1218 	vleft = table->maxlen / sizeof(unsigned long);
1219 	left = *lenp;
1220 
1221 	if (write) {
1222 		if (proc_first_pos_non_zero_ignore(ppos, table))
1223 			goto out;
1224 
1225 		if (left > PAGE_SIZE - 1)
1226 			left = PAGE_SIZE - 1;
1227 		p = buffer;
1228 	}
1229 
1230 	for (; left && vleft--; i++, first = 0) {
1231 		unsigned long val;
1232 
1233 		if (write) {
1234 			bool neg;
1235 
1236 			proc_skip_spaces(&p, &left);
1237 			if (!left)
1238 				break;
1239 
1240 			err = proc_get_long(&p, &left, &val, &neg,
1241 					     proc_wspace_sep,
1242 					     sizeof(proc_wspace_sep), NULL);
1243 			if (err)
1244 				break;
1245 			if (neg)
1246 				continue;
1247 			val = convmul * val / convdiv;
1248 			if ((min && val < *min) || (max && val > *max)) {
1249 				err = -EINVAL;
1250 				break;
1251 			}
1252 			WRITE_ONCE(*i, val);
1253 		} else {
1254 			val = convdiv * READ_ONCE(*i) / convmul;
1255 			if (!first)
1256 				proc_put_char(&buffer, &left, '\t');
1257 			proc_put_long(&buffer, &left, val, false);
1258 		}
1259 	}
1260 
1261 	if (!write && !first && left && !err)
1262 		proc_put_char(&buffer, &left, '\n');
1263 	if (write && !err)
1264 		proc_skip_spaces(&p, &left);
1265 	if (write && first)
1266 		return err ? : -EINVAL;
1267 	*lenp -= left;
1268 out:
1269 	*ppos += *lenp;
1270 	return err;
1271 }
1272 
do_proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1273 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1274 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1275 		unsigned long convdiv)
1276 {
1277 	return __do_proc_doulongvec_minmax(table->data, table, write,
1278 			buffer, lenp, ppos, convmul, convdiv);
1279 }
1280 
1281 /**
1282  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1283  * @table: the sysctl table
1284  * @write: %TRUE if this is a write to the sysctl file
1285  * @buffer: the user buffer
1286  * @lenp: the size of the user buffer
1287  * @ppos: file position
1288  *
1289  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1290  * values from/to the user buffer, treated as an ASCII string.
1291  *
1292  * This routine will ensure the values are within the range specified by
1293  * table->extra1 (min) and table->extra2 (max).
1294  *
1295  * Returns 0 on success.
1296  */
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1297 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1298 			   void *buffer, size_t *lenp, loff_t *ppos)
1299 {
1300     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1301 }
1302 
1303 /**
1304  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1305  * @table: the sysctl table
1306  * @write: %TRUE if this is a write to the sysctl file
1307  * @buffer: the user buffer
1308  * @lenp: the size of the user buffer
1309  * @ppos: file position
1310  *
1311  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1312  * values from/to the user buffer, treated as an ASCII string. The values
1313  * are treated as milliseconds, and converted to jiffies when they are stored.
1314  *
1315  * This routine will ensure the values are within the range specified by
1316  * table->extra1 (min) and table->extra2 (max).
1317  *
1318  * Returns 0 on success.
1319  */
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1320 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1321 				      void *buffer, size_t *lenp, loff_t *ppos)
1322 {
1323     return do_proc_doulongvec_minmax(table, write, buffer,
1324 				     lenp, ppos, HZ, 1000l);
1325 }
1326 
1327 
do_proc_dointvec_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1328 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1329 					 int *valp,
1330 					 int write, void *data)
1331 {
1332 	if (write) {
1333 		if (*lvalp > INT_MAX / HZ)
1334 			return 1;
1335 		if (*negp)
1336 			WRITE_ONCE(*valp, -*lvalp * HZ);
1337 		else
1338 			WRITE_ONCE(*valp, *lvalp * HZ);
1339 	} else {
1340 		int val = READ_ONCE(*valp);
1341 		unsigned long lval;
1342 		if (val < 0) {
1343 			*negp = true;
1344 			lval = -(unsigned long)val;
1345 		} else {
1346 			*negp = false;
1347 			lval = (unsigned long)val;
1348 		}
1349 		*lvalp = lval / HZ;
1350 	}
1351 	return 0;
1352 }
1353 
do_proc_dointvec_userhz_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1354 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1355 						int *valp,
1356 						int write, void *data)
1357 {
1358 	if (write) {
1359 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1360 			return 1;
1361 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1362 	} else {
1363 		int val = *valp;
1364 		unsigned long lval;
1365 		if (val < 0) {
1366 			*negp = true;
1367 			lval = -(unsigned long)val;
1368 		} else {
1369 			*negp = false;
1370 			lval = (unsigned long)val;
1371 		}
1372 		*lvalp = jiffies_to_clock_t(lval);
1373 	}
1374 	return 0;
1375 }
1376 
do_proc_dointvec_ms_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1377 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1378 					    int *valp,
1379 					    int write, void *data)
1380 {
1381 	if (write) {
1382 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1383 
1384 		if (jif > INT_MAX)
1385 			return 1;
1386 		WRITE_ONCE(*valp, (int)jif);
1387 	} else {
1388 		int val = READ_ONCE(*valp);
1389 		unsigned long lval;
1390 		if (val < 0) {
1391 			*negp = true;
1392 			lval = -(unsigned long)val;
1393 		} else {
1394 			*negp = false;
1395 			lval = (unsigned long)val;
1396 		}
1397 		*lvalp = jiffies_to_msecs(lval);
1398 	}
1399 	return 0;
1400 }
1401 
1402 /**
1403  * proc_dointvec_jiffies - read a vector of integers as seconds
1404  * @table: the sysctl table
1405  * @write: %TRUE if this is a write to the sysctl file
1406  * @buffer: the user buffer
1407  * @lenp: the size of the user buffer
1408  * @ppos: file position
1409  *
1410  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1411  * values from/to the user buffer, treated as an ASCII string.
1412  * The values read are assumed to be in seconds, and are converted into
1413  * jiffies.
1414  *
1415  * Returns 0 on success.
1416  */
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1417 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1418 			  void *buffer, size_t *lenp, loff_t *ppos)
1419 {
1420     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1421 		    	    do_proc_dointvec_jiffies_conv,NULL);
1422 }
1423 
1424 /**
1425  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1426  * @table: the sysctl table
1427  * @write: %TRUE if this is a write to the sysctl file
1428  * @buffer: the user buffer
1429  * @lenp: the size of the user buffer
1430  * @ppos: pointer to the file position
1431  *
1432  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1433  * values from/to the user buffer, treated as an ASCII string.
1434  * The values read are assumed to be in 1/USER_HZ seconds, and
1435  * are converted into jiffies.
1436  *
1437  * Returns 0 on success.
1438  */
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1439 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1440 				 void *buffer, size_t *lenp, loff_t *ppos)
1441 {
1442     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1443 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
1444 }
1445 
1446 /**
1447  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1448  * @table: the sysctl table
1449  * @write: %TRUE if this is a write to the sysctl file
1450  * @buffer: the user buffer
1451  * @lenp: the size of the user buffer
1452  * @ppos: file position
1453  * @ppos: the current position in the file
1454  *
1455  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1456  * values from/to the user buffer, treated as an ASCII string.
1457  * The values read are assumed to be in 1/1000 seconds, and
1458  * are converted into jiffies.
1459  *
1460  * Returns 0 on success.
1461  */
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1462 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1463 		size_t *lenp, loff_t *ppos)
1464 {
1465 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1466 				do_proc_dointvec_ms_jiffies_conv, NULL);
1467 }
1468 
proc_do_cad_pid(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1469 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1470 		size_t *lenp, loff_t *ppos)
1471 {
1472 	struct pid *new_pid;
1473 	pid_t tmp;
1474 	int r;
1475 
1476 	tmp = pid_vnr(cad_pid);
1477 
1478 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1479 			       lenp, ppos, NULL, NULL);
1480 	if (r || !write)
1481 		return r;
1482 
1483 	new_pid = find_get_pid(tmp);
1484 	if (!new_pid)
1485 		return -ESRCH;
1486 
1487 	put_pid(xchg(&cad_pid, new_pid));
1488 	return 0;
1489 }
1490 
1491 /**
1492  * proc_do_large_bitmap - read/write from/to a large bitmap
1493  * @table: the sysctl table
1494  * @write: %TRUE if this is a write to the sysctl file
1495  * @buffer: the user buffer
1496  * @lenp: the size of the user buffer
1497  * @ppos: file position
1498  *
1499  * The bitmap is stored at table->data and the bitmap length (in bits)
1500  * in table->maxlen.
1501  *
1502  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1503  * large bitmaps may be represented in a compact manner. Writing into
1504  * the file will clear the bitmap then update it with the given input.
1505  *
1506  * Returns 0 on success.
1507  */
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1508 int proc_do_large_bitmap(struct ctl_table *table, int write,
1509 			 void *buffer, size_t *lenp, loff_t *ppos)
1510 {
1511 	int err = 0;
1512 	bool first = 1;
1513 	size_t left = *lenp;
1514 	unsigned long bitmap_len = table->maxlen;
1515 	unsigned long *bitmap = *(unsigned long **) table->data;
1516 	unsigned long *tmp_bitmap = NULL;
1517 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1518 
1519 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1520 		*lenp = 0;
1521 		return 0;
1522 	}
1523 
1524 	if (write) {
1525 		char *p = buffer;
1526 		size_t skipped = 0;
1527 
1528 		if (left > PAGE_SIZE - 1) {
1529 			left = PAGE_SIZE - 1;
1530 			/* How much of the buffer we'll skip this pass */
1531 			skipped = *lenp - left;
1532 		}
1533 
1534 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1535 		if (!tmp_bitmap)
1536 			return -ENOMEM;
1537 		proc_skip_char(&p, &left, '\n');
1538 		while (!err && left) {
1539 			unsigned long val_a, val_b;
1540 			bool neg;
1541 			size_t saved_left;
1542 
1543 			/* In case we stop parsing mid-number, we can reset */
1544 			saved_left = left;
1545 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1546 					     sizeof(tr_a), &c);
1547 			/*
1548 			 * If we consumed the entirety of a truncated buffer or
1549 			 * only one char is left (may be a "-"), then stop here,
1550 			 * reset, & come back for more.
1551 			 */
1552 			if ((left <= 1) && skipped) {
1553 				left = saved_left;
1554 				break;
1555 			}
1556 
1557 			if (err)
1558 				break;
1559 			if (val_a >= bitmap_len || neg) {
1560 				err = -EINVAL;
1561 				break;
1562 			}
1563 
1564 			val_b = val_a;
1565 			if (left) {
1566 				p++;
1567 				left--;
1568 			}
1569 
1570 			if (c == '-') {
1571 				err = proc_get_long(&p, &left, &val_b,
1572 						     &neg, tr_b, sizeof(tr_b),
1573 						     &c);
1574 				/*
1575 				 * If we consumed all of a truncated buffer or
1576 				 * then stop here, reset, & come back for more.
1577 				 */
1578 				if (!left && skipped) {
1579 					left = saved_left;
1580 					break;
1581 				}
1582 
1583 				if (err)
1584 					break;
1585 				if (val_b >= bitmap_len || neg ||
1586 				    val_a > val_b) {
1587 					err = -EINVAL;
1588 					break;
1589 				}
1590 				if (left) {
1591 					p++;
1592 					left--;
1593 				}
1594 			}
1595 
1596 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1597 			first = 0;
1598 			proc_skip_char(&p, &left, '\n');
1599 		}
1600 		left += skipped;
1601 	} else {
1602 		unsigned long bit_a, bit_b = 0;
1603 
1604 		while (left) {
1605 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1606 			if (bit_a >= bitmap_len)
1607 				break;
1608 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1609 						   bit_a + 1) - 1;
1610 
1611 			if (!first)
1612 				proc_put_char(&buffer, &left, ',');
1613 			proc_put_long(&buffer, &left, bit_a, false);
1614 			if (bit_a != bit_b) {
1615 				proc_put_char(&buffer, &left, '-');
1616 				proc_put_long(&buffer, &left, bit_b, false);
1617 			}
1618 
1619 			first = 0; bit_b++;
1620 		}
1621 		proc_put_char(&buffer, &left, '\n');
1622 	}
1623 
1624 	if (!err) {
1625 		if (write) {
1626 			if (*ppos)
1627 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1628 			else
1629 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1630 		}
1631 		*lenp -= left;
1632 		*ppos += *lenp;
1633 	}
1634 
1635 	bitmap_free(tmp_bitmap);
1636 	return err;
1637 }
1638 
1639 #else /* CONFIG_PROC_SYSCTL */
1640 
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1641 int proc_dostring(struct ctl_table *table, int write,
1642 		  void *buffer, size_t *lenp, loff_t *ppos)
1643 {
1644 	return -ENOSYS;
1645 }
1646 
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1647 int proc_dointvec(struct ctl_table *table, int write,
1648 		  void *buffer, size_t *lenp, loff_t *ppos)
1649 {
1650 	return -ENOSYS;
1651 }
1652 
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1653 int proc_douintvec(struct ctl_table *table, int write,
1654 		  void *buffer, size_t *lenp, loff_t *ppos)
1655 {
1656 	return -ENOSYS;
1657 }
1658 
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1659 int proc_dointvec_minmax(struct ctl_table *table, int write,
1660 		    void *buffer, size_t *lenp, loff_t *ppos)
1661 {
1662 	return -ENOSYS;
1663 }
1664 
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1665 int proc_douintvec_minmax(struct ctl_table *table, int write,
1666 			  void *buffer, size_t *lenp, loff_t *ppos)
1667 {
1668 	return -ENOSYS;
1669 }
1670 
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1671 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1672 			void *buffer, size_t *lenp, loff_t *ppos)
1673 {
1674 	return -ENOSYS;
1675 }
1676 
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1677 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1678 		    void *buffer, size_t *lenp, loff_t *ppos)
1679 {
1680 	return -ENOSYS;
1681 }
1682 
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1683 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1684 		    void *buffer, size_t *lenp, loff_t *ppos)
1685 {
1686 	return -ENOSYS;
1687 }
1688 
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1689 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1690 			     void *buffer, size_t *lenp, loff_t *ppos)
1691 {
1692 	return -ENOSYS;
1693 }
1694 
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1695 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1696 		    void *buffer, size_t *lenp, loff_t *ppos)
1697 {
1698 	return -ENOSYS;
1699 }
1700 
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1701 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1702 				      void *buffer, size_t *lenp, loff_t *ppos)
1703 {
1704 	return -ENOSYS;
1705 }
1706 
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1707 int proc_do_large_bitmap(struct ctl_table *table, int write,
1708 			 void *buffer, size_t *lenp, loff_t *ppos)
1709 {
1710 	return -ENOSYS;
1711 }
1712 
1713 #endif /* CONFIG_PROC_SYSCTL */
1714 
1715 #if defined(CONFIG_SYSCTL)
proc_do_static_key(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1716 int proc_do_static_key(struct ctl_table *table, int write,
1717 		       void *buffer, size_t *lenp, loff_t *ppos)
1718 {
1719 	struct static_key *key = (struct static_key *)table->data;
1720 	static DEFINE_MUTEX(static_key_mutex);
1721 	int val, ret;
1722 	struct ctl_table tmp = {
1723 		.data   = &val,
1724 		.maxlen = sizeof(val),
1725 		.mode   = table->mode,
1726 		.extra1 = SYSCTL_ZERO,
1727 		.extra2 = SYSCTL_ONE,
1728 	};
1729 
1730 	if (write && !capable(CAP_SYS_ADMIN))
1731 		return -EPERM;
1732 
1733 	mutex_lock(&static_key_mutex);
1734 	val = static_key_enabled(key);
1735 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1736 	if (write && !ret) {
1737 		if (val)
1738 			static_key_enable(key);
1739 		else
1740 			static_key_disable(key);
1741 	}
1742 	mutex_unlock(&static_key_mutex);
1743 	return ret;
1744 }
1745 
1746 static struct ctl_table kern_table[] = {
1747 	{
1748 		.procname	= "sched_child_runs_first",
1749 		.data		= &sysctl_sched_child_runs_first,
1750 		.maxlen		= sizeof(unsigned int),
1751 		.mode		= 0644,
1752 		.proc_handler	= proc_dointvec,
1753 	},
1754 #ifdef CONFIG_SCHED_DEBUG
1755 	{
1756 		.procname	= "sched_min_granularity_ns",
1757 		.data		= &sysctl_sched_min_granularity,
1758 		.maxlen		= sizeof(unsigned int),
1759 		.mode		= 0644,
1760 		.proc_handler	= sched_proc_update_handler,
1761 		.extra1		= &min_sched_granularity_ns,
1762 		.extra2		= &max_sched_granularity_ns,
1763 	},
1764 	{
1765 		.procname	= "sched_latency_ns",
1766 		.data		= &sysctl_sched_latency,
1767 		.maxlen		= sizeof(unsigned int),
1768 		.mode		= 0644,
1769 		.proc_handler	= sched_proc_update_handler,
1770 		.extra1		= &min_sched_granularity_ns,
1771 		.extra2		= &max_sched_granularity_ns,
1772 	},
1773 	{
1774 		.procname	= "sched_wakeup_granularity_ns",
1775 		.data		= &sysctl_sched_wakeup_granularity,
1776 		.maxlen		= sizeof(unsigned int),
1777 		.mode		= 0644,
1778 		.proc_handler	= sched_proc_update_handler,
1779 		.extra1		= &min_wakeup_granularity_ns,
1780 		.extra2		= &max_wakeup_granularity_ns,
1781 	},
1782 #ifdef CONFIG_SMP
1783 	{
1784 		.procname	= "sched_tunable_scaling",
1785 		.data		= &sysctl_sched_tunable_scaling,
1786 		.maxlen		= sizeof(enum sched_tunable_scaling),
1787 		.mode		= 0644,
1788 		.proc_handler	= sched_proc_update_handler,
1789 		.extra1		= &min_sched_tunable_scaling,
1790 		.extra2		= &max_sched_tunable_scaling,
1791 	},
1792 	{
1793 		.procname	= "sched_migration_cost_ns",
1794 		.data		= &sysctl_sched_migration_cost,
1795 		.maxlen		= sizeof(unsigned int),
1796 		.mode		= 0644,
1797 		.proc_handler	= proc_dointvec,
1798 	},
1799 	{
1800 		.procname	= "sched_nr_migrate",
1801 		.data		= &sysctl_sched_nr_migrate,
1802 		.maxlen		= sizeof(unsigned int),
1803 		.mode		= 0644,
1804 		.proc_handler	= proc_dointvec,
1805 	},
1806 #ifdef CONFIG_SCHEDSTATS
1807 	{
1808 		.procname	= "sched_schedstats",
1809 		.data		= NULL,
1810 		.maxlen		= sizeof(unsigned int),
1811 		.mode		= 0644,
1812 		.proc_handler	= sysctl_schedstats,
1813 		.extra1		= SYSCTL_ZERO,
1814 		.extra2		= SYSCTL_ONE,
1815 	},
1816 #endif /* CONFIG_SCHEDSTATS */
1817 #endif /* CONFIG_SMP */
1818 #ifdef CONFIG_NUMA_BALANCING
1819 	{
1820 		.procname	= "numa_balancing_scan_delay_ms",
1821 		.data		= &sysctl_numa_balancing_scan_delay,
1822 		.maxlen		= sizeof(unsigned int),
1823 		.mode		= 0644,
1824 		.proc_handler	= proc_dointvec,
1825 	},
1826 	{
1827 		.procname	= "numa_balancing_scan_period_min_ms",
1828 		.data		= &sysctl_numa_balancing_scan_period_min,
1829 		.maxlen		= sizeof(unsigned int),
1830 		.mode		= 0644,
1831 		.proc_handler	= proc_dointvec,
1832 	},
1833 	{
1834 		.procname	= "numa_balancing_scan_period_max_ms",
1835 		.data		= &sysctl_numa_balancing_scan_period_max,
1836 		.maxlen		= sizeof(unsigned int),
1837 		.mode		= 0644,
1838 		.proc_handler	= proc_dointvec,
1839 	},
1840 	{
1841 		.procname	= "numa_balancing_scan_size_mb",
1842 		.data		= &sysctl_numa_balancing_scan_size,
1843 		.maxlen		= sizeof(unsigned int),
1844 		.mode		= 0644,
1845 		.proc_handler	= proc_dointvec_minmax,
1846 		.extra1		= SYSCTL_ONE,
1847 	},
1848 	{
1849 		.procname	= "numa_balancing",
1850 		.data		= NULL, /* filled in by handler */
1851 		.maxlen		= sizeof(unsigned int),
1852 		.mode		= 0644,
1853 		.proc_handler	= sysctl_numa_balancing,
1854 		.extra1		= SYSCTL_ZERO,
1855 		.extra2		= SYSCTL_ONE,
1856 	},
1857 #endif /* CONFIG_NUMA_BALANCING */
1858 #endif /* CONFIG_SCHED_DEBUG */
1859 	{
1860 		.procname	= "sched_rt_period_us",
1861 		.data		= &sysctl_sched_rt_period,
1862 		.maxlen		= sizeof(unsigned int),
1863 		.mode		= 0644,
1864 		.proc_handler	= sched_rt_handler,
1865 		.extra1		= SYSCTL_ONE,
1866 		.extra2		= SYSCTL_INT_MAX,
1867 	},
1868 	{
1869 		.procname	= "sched_rt_runtime_us",
1870 		.data		= &sysctl_sched_rt_runtime,
1871 		.maxlen		= sizeof(int),
1872 		.mode		= 0644,
1873 		.proc_handler	= sched_rt_handler,
1874 		.extra1		= SYSCTL_NEG_ONE,
1875 		.extra2		= SYSCTL_INT_MAX,
1876 	},
1877 	{
1878 		.procname	= "sched_deadline_period_max_us",
1879 		.data		= &sysctl_sched_dl_period_max,
1880 		.maxlen		= sizeof(unsigned int),
1881 		.mode		= 0644,
1882 		.proc_handler	= proc_dointvec,
1883 	},
1884 	{
1885 		.procname	= "sched_deadline_period_min_us",
1886 		.data		= &sysctl_sched_dl_period_min,
1887 		.maxlen		= sizeof(unsigned int),
1888 		.mode		= 0644,
1889 		.proc_handler	= proc_dointvec,
1890 	},
1891 	{
1892 		.procname	= "sched_rr_timeslice_ms",
1893 		.data		= &sysctl_sched_rr_timeslice,
1894 		.maxlen		= sizeof(int),
1895 		.mode		= 0644,
1896 		.proc_handler	= sched_rr_handler,
1897 	},
1898 #ifdef CONFIG_SMP
1899 	{
1900 		.procname	= "sched_pelt_multiplier",
1901 		.data		= &sysctl_sched_pelt_multiplier,
1902 		.maxlen		= sizeof(unsigned int),
1903 		.mode		= 0644,
1904 		.proc_handler	= sched_pelt_multiplier,
1905 	},
1906 #endif
1907 #ifdef CONFIG_UCLAMP_TASK
1908 	{
1909 		.procname	= "sched_util_clamp_min",
1910 		.data		= &sysctl_sched_uclamp_util_min,
1911 		.maxlen		= sizeof(unsigned int),
1912 		.mode		= 0644,
1913 		.proc_handler	= sysctl_sched_uclamp_handler,
1914 	},
1915 	{
1916 		.procname	= "sched_util_clamp_max",
1917 		.data		= &sysctl_sched_uclamp_util_max,
1918 		.maxlen		= sizeof(unsigned int),
1919 		.mode		= 0644,
1920 		.proc_handler	= sysctl_sched_uclamp_handler,
1921 	},
1922 	{
1923 		.procname	= "sched_util_clamp_min_rt_default",
1924 		.data		= &sysctl_sched_uclamp_util_min_rt_default,
1925 		.maxlen		= sizeof(unsigned int),
1926 		.mode		= 0644,
1927 		.proc_handler	= sysctl_sched_uclamp_handler,
1928 	},
1929 #endif
1930 #ifdef CONFIG_SCHED_AUTOGROUP
1931 	{
1932 		.procname	= "sched_autogroup_enabled",
1933 		.data		= &sysctl_sched_autogroup_enabled,
1934 		.maxlen		= sizeof(unsigned int),
1935 		.mode		= 0644,
1936 		.proc_handler	= proc_dointvec_minmax,
1937 		.extra1		= SYSCTL_ZERO,
1938 		.extra2		= SYSCTL_ONE,
1939 	},
1940 #endif
1941 #ifdef CONFIG_CFS_BANDWIDTH
1942 	{
1943 		.procname	= "sched_cfs_bandwidth_slice_us",
1944 		.data		= &sysctl_sched_cfs_bandwidth_slice,
1945 		.maxlen		= sizeof(unsigned int),
1946 		.mode		= 0644,
1947 		.proc_handler	= proc_dointvec_minmax,
1948 		.extra1		= SYSCTL_ONE,
1949 	},
1950 #endif
1951 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
1952 	{
1953 		.procname	= "sched_energy_aware",
1954 		.data		= &sysctl_sched_energy_aware,
1955 		.maxlen		= sizeof(unsigned int),
1956 		.mode		= 0644,
1957 		.proc_handler	= sched_energy_aware_handler,
1958 		.extra1		= SYSCTL_ZERO,
1959 		.extra2		= SYSCTL_ONE,
1960 	},
1961 #endif
1962 #ifdef CONFIG_PROVE_LOCKING
1963 	{
1964 		.procname	= "prove_locking",
1965 		.data		= &prove_locking,
1966 		.maxlen		= sizeof(int),
1967 		.mode		= 0644,
1968 		.proc_handler	= proc_dointvec,
1969 	},
1970 #endif
1971 #ifdef CONFIG_LOCK_STAT
1972 	{
1973 		.procname	= "lock_stat",
1974 		.data		= &lock_stat,
1975 		.maxlen		= sizeof(int),
1976 		.mode		= 0644,
1977 		.proc_handler	= proc_dointvec,
1978 	},
1979 #endif
1980 	{
1981 		.procname	= "panic",
1982 		.data		= &panic_timeout,
1983 		.maxlen		= sizeof(int),
1984 		.mode		= 0644,
1985 		.proc_handler	= proc_dointvec,
1986 	},
1987 #ifdef CONFIG_COREDUMP
1988 	{
1989 		.procname	= "core_uses_pid",
1990 		.data		= &core_uses_pid,
1991 		.maxlen		= sizeof(int),
1992 		.mode		= 0644,
1993 		.proc_handler	= proc_dointvec,
1994 	},
1995 	{
1996 		.procname	= "core_pattern",
1997 		.data		= core_pattern,
1998 		.maxlen		= CORENAME_MAX_SIZE,
1999 		.mode		= 0644,
2000 		.proc_handler	= proc_dostring_coredump,
2001 	},
2002 	{
2003 		.procname	= "core_pipe_limit",
2004 		.data		= &core_pipe_limit,
2005 		.maxlen		= sizeof(unsigned int),
2006 		.mode		= 0644,
2007 		.proc_handler	= proc_dointvec,
2008 	},
2009 #endif
2010 #ifdef CONFIG_PROC_SYSCTL
2011 	{
2012 		.procname	= "tainted",
2013 		.maxlen 	= sizeof(long),
2014 		.mode		= 0644,
2015 		.proc_handler	= proc_taint,
2016 	},
2017 	{
2018 		.procname	= "sysctl_writes_strict",
2019 		.data		= &sysctl_writes_strict,
2020 		.maxlen		= sizeof(int),
2021 		.mode		= 0644,
2022 		.proc_handler	= proc_dointvec_minmax,
2023 		.extra1		= SYSCTL_NEG_ONE,
2024 		.extra2		= SYSCTL_ONE,
2025 	},
2026 #endif
2027 #ifdef CONFIG_LATENCYTOP
2028 	{
2029 		.procname	= "latencytop",
2030 		.data		= &latencytop_enabled,
2031 		.maxlen		= sizeof(int),
2032 		.mode		= 0644,
2033 		.proc_handler	= sysctl_latencytop,
2034 	},
2035 #endif
2036 #ifdef CONFIG_BLK_DEV_INITRD
2037 	{
2038 		.procname	= "real-root-dev",
2039 		.data		= &real_root_dev,
2040 		.maxlen		= sizeof(int),
2041 		.mode		= 0644,
2042 		.proc_handler	= proc_dointvec,
2043 	},
2044 #endif
2045 	{
2046 		.procname	= "print-fatal-signals",
2047 		.data		= &print_fatal_signals,
2048 		.maxlen		= sizeof(int),
2049 		.mode		= 0644,
2050 		.proc_handler	= proc_dointvec,
2051 	},
2052 #ifdef CONFIG_SPARC
2053 	{
2054 		.procname	= "reboot-cmd",
2055 		.data		= reboot_command,
2056 		.maxlen		= 256,
2057 		.mode		= 0644,
2058 		.proc_handler	= proc_dostring,
2059 	},
2060 	{
2061 		.procname	= "stop-a",
2062 		.data		= &stop_a_enabled,
2063 		.maxlen		= sizeof (int),
2064 		.mode		= 0644,
2065 		.proc_handler	= proc_dointvec,
2066 	},
2067 	{
2068 		.procname	= "scons-poweroff",
2069 		.data		= &scons_pwroff,
2070 		.maxlen		= sizeof (int),
2071 		.mode		= 0644,
2072 		.proc_handler	= proc_dointvec,
2073 	},
2074 #endif
2075 #ifdef CONFIG_SPARC64
2076 	{
2077 		.procname	= "tsb-ratio",
2078 		.data		= &sysctl_tsb_ratio,
2079 		.maxlen		= sizeof (int),
2080 		.mode		= 0644,
2081 		.proc_handler	= proc_dointvec,
2082 	},
2083 #endif
2084 #ifdef CONFIG_PARISC
2085 	{
2086 		.procname	= "soft-power",
2087 		.data		= &pwrsw_enabled,
2088 		.maxlen		= sizeof (int),
2089 		.mode		= 0644,
2090 		.proc_handler	= proc_dointvec,
2091 	},
2092 #endif
2093 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
2094 	{
2095 		.procname	= "unaligned-trap",
2096 		.data		= &unaligned_enabled,
2097 		.maxlen		= sizeof (int),
2098 		.mode		= 0644,
2099 		.proc_handler	= proc_dointvec,
2100 	},
2101 #endif
2102 	{
2103 		.procname	= "ctrl-alt-del",
2104 		.data		= &C_A_D,
2105 		.maxlen		= sizeof(int),
2106 		.mode		= 0644,
2107 		.proc_handler	= proc_dointvec,
2108 	},
2109 #ifdef CONFIG_FUNCTION_TRACER
2110 	{
2111 		.procname	= "ftrace_enabled",
2112 		.data		= &ftrace_enabled,
2113 		.maxlen		= sizeof(int),
2114 		.mode		= 0644,
2115 		.proc_handler	= ftrace_enable_sysctl,
2116 	},
2117 #endif
2118 #ifdef CONFIG_STACK_TRACER
2119 	{
2120 		.procname	= "stack_tracer_enabled",
2121 		.data		= &stack_tracer_enabled,
2122 		.maxlen		= sizeof(int),
2123 		.mode		= 0644,
2124 		.proc_handler	= stack_trace_sysctl,
2125 	},
2126 #endif
2127 #ifdef CONFIG_TRACING
2128 	{
2129 		.procname	= "ftrace_dump_on_oops",
2130 		.data		= &ftrace_dump_on_oops,
2131 		.maxlen		= sizeof(int),
2132 		.mode		= 0644,
2133 		.proc_handler	= proc_dointvec,
2134 	},
2135 	{
2136 		.procname	= "traceoff_on_warning",
2137 		.data		= &__disable_trace_on_warning,
2138 		.maxlen		= sizeof(__disable_trace_on_warning),
2139 		.mode		= 0644,
2140 		.proc_handler	= proc_dointvec,
2141 	},
2142 	{
2143 		.procname	= "tracepoint_printk",
2144 		.data		= &tracepoint_printk,
2145 		.maxlen		= sizeof(tracepoint_printk),
2146 		.mode		= 0644,
2147 		.proc_handler	= tracepoint_printk_sysctl,
2148 	},
2149 #endif
2150 #ifdef CONFIG_KEXEC_CORE
2151 	{
2152 		.procname	= "kexec_load_disabled",
2153 		.data		= &kexec_load_disabled,
2154 		.maxlen		= sizeof(int),
2155 		.mode		= 0644,
2156 		/* only handle a transition from default "0" to "1" */
2157 		.proc_handler	= proc_dointvec_minmax,
2158 		.extra1		= SYSCTL_ONE,
2159 		.extra2		= SYSCTL_ONE,
2160 	},
2161 #endif
2162 #ifdef CONFIG_MODULES
2163 	{
2164 		.procname	= "modprobe",
2165 		.data		= &modprobe_path,
2166 		.maxlen		= KMOD_PATH_LEN,
2167 		.mode		= 0644,
2168 		.proc_handler	= proc_dostring,
2169 	},
2170 	{
2171 		.procname	= "modules_disabled",
2172 		.data		= &modules_disabled,
2173 		.maxlen		= sizeof(int),
2174 		.mode		= 0644,
2175 		/* only handle a transition from default "0" to "1" */
2176 		.proc_handler	= proc_dointvec_minmax,
2177 		.extra1		= SYSCTL_ONE,
2178 		.extra2		= SYSCTL_ONE,
2179 	},
2180 #endif
2181 #ifdef CONFIG_UEVENT_HELPER
2182 	{
2183 		.procname	= "hotplug",
2184 		.data		= &uevent_helper,
2185 		.maxlen		= UEVENT_HELPER_PATH_LEN,
2186 		.mode		= 0644,
2187 		.proc_handler	= proc_dostring,
2188 	},
2189 #endif
2190 #ifdef CONFIG_CHR_DEV_SG
2191 	{
2192 		.procname	= "sg-big-buff",
2193 		.data		= &sg_big_buff,
2194 		.maxlen		= sizeof (int),
2195 		.mode		= 0444,
2196 		.proc_handler	= proc_dointvec,
2197 	},
2198 #endif
2199 #ifdef CONFIG_BSD_PROCESS_ACCT
2200 	{
2201 		.procname	= "acct",
2202 		.data		= &acct_parm,
2203 		.maxlen		= 3*sizeof(int),
2204 		.mode		= 0644,
2205 		.proc_handler	= proc_dointvec,
2206 	},
2207 #endif
2208 #ifdef CONFIG_MAGIC_SYSRQ
2209 	{
2210 		.procname	= "sysrq",
2211 		.data		= NULL,
2212 		.maxlen		= sizeof (int),
2213 		.mode		= 0644,
2214 		.proc_handler	= sysrq_sysctl_handler,
2215 	},
2216 #endif
2217 #ifdef CONFIG_PROC_SYSCTL
2218 	{
2219 		.procname	= "cad_pid",
2220 		.data		= NULL,
2221 		.maxlen		= sizeof (int),
2222 		.mode		= 0600,
2223 		.proc_handler	= proc_do_cad_pid,
2224 	},
2225 #endif
2226 	{
2227 		.procname	= "threads-max",
2228 		.data		= NULL,
2229 		.maxlen		= sizeof(int),
2230 		.mode		= 0644,
2231 		.proc_handler	= sysctl_max_threads,
2232 	},
2233 	{
2234 		.procname	= "random",
2235 		.mode		= 0555,
2236 		.child		= random_table,
2237 	},
2238 	{
2239 		.procname	= "usermodehelper",
2240 		.mode		= 0555,
2241 		.child		= usermodehelper_table,
2242 	},
2243 #ifdef CONFIG_FW_LOADER_USER_HELPER
2244 	{
2245 		.procname	= "firmware_config",
2246 		.mode		= 0555,
2247 		.child		= firmware_config_table,
2248 	},
2249 #endif
2250 	{
2251 		.procname	= "overflowuid",
2252 		.data		= &overflowuid,
2253 		.maxlen		= sizeof(int),
2254 		.mode		= 0644,
2255 		.proc_handler	= proc_dointvec_minmax,
2256 		.extra1		= &minolduid,
2257 		.extra2		= &maxolduid,
2258 	},
2259 	{
2260 		.procname	= "overflowgid",
2261 		.data		= &overflowgid,
2262 		.maxlen		= sizeof(int),
2263 		.mode		= 0644,
2264 		.proc_handler	= proc_dointvec_minmax,
2265 		.extra1		= &minolduid,
2266 		.extra2		= &maxolduid,
2267 	},
2268 #ifdef CONFIG_S390
2269 	{
2270 		.procname	= "userprocess_debug",
2271 		.data		= &show_unhandled_signals,
2272 		.maxlen		= sizeof(int),
2273 		.mode		= 0644,
2274 		.proc_handler	= proc_dointvec,
2275 	},
2276 #endif
2277 	{
2278 		.procname	= "pid_max",
2279 		.data		= &pid_max,
2280 		.maxlen		= sizeof (int),
2281 		.mode		= 0644,
2282 		.proc_handler	= proc_dointvec_minmax,
2283 		.extra1		= &pid_max_min,
2284 		.extra2		= &pid_max_max,
2285 	},
2286 	{
2287 		.procname	= "panic_on_oops",
2288 		.data		= &panic_on_oops,
2289 		.maxlen		= sizeof(int),
2290 		.mode		= 0644,
2291 		.proc_handler	= proc_dointvec,
2292 	},
2293 	{
2294 		.procname	= "panic_print",
2295 		.data		= &panic_print,
2296 		.maxlen		= sizeof(unsigned long),
2297 		.mode		= 0644,
2298 		.proc_handler	= proc_doulongvec_minmax,
2299 	},
2300 #if defined CONFIG_PRINTK
2301 	{
2302 		.procname	= "printk",
2303 		.data		= &console_loglevel,
2304 		.maxlen		= 4*sizeof(int),
2305 		.mode		= 0644,
2306 		.proc_handler	= proc_dointvec,
2307 	},
2308 	{
2309 		.procname	= "printk_ratelimit",
2310 		.data		= &printk_ratelimit_state.interval,
2311 		.maxlen		= sizeof(int),
2312 		.mode		= 0644,
2313 		.proc_handler	= proc_dointvec_jiffies,
2314 	},
2315 	{
2316 		.procname	= "printk_ratelimit_burst",
2317 		.data		= &printk_ratelimit_state.burst,
2318 		.maxlen		= sizeof(int),
2319 		.mode		= 0644,
2320 		.proc_handler	= proc_dointvec,
2321 	},
2322 	{
2323 		.procname	= "printk_delay",
2324 		.data		= &printk_delay_msec,
2325 		.maxlen		= sizeof(int),
2326 		.mode		= 0644,
2327 		.proc_handler	= proc_dointvec_minmax,
2328 		.extra1		= SYSCTL_ZERO,
2329 		.extra2		= &ten_thousand,
2330 	},
2331 	{
2332 		.procname	= "printk_devkmsg",
2333 		.data		= devkmsg_log_str,
2334 		.maxlen		= DEVKMSG_STR_MAX_SIZE,
2335 		.mode		= 0644,
2336 		.proc_handler	= devkmsg_sysctl_set_loglvl,
2337 	},
2338 	{
2339 		.procname	= "dmesg_restrict",
2340 		.data		= &dmesg_restrict,
2341 		.maxlen		= sizeof(int),
2342 		.mode		= 0644,
2343 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2344 		.extra1		= SYSCTL_ZERO,
2345 		.extra2		= SYSCTL_ONE,
2346 	},
2347 	{
2348 		.procname	= "kptr_restrict",
2349 		.data		= &kptr_restrict,
2350 		.maxlen		= sizeof(int),
2351 		.mode		= 0644,
2352 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2353 		.extra1		= SYSCTL_ZERO,
2354 		.extra2		= SYSCTL_TWO,
2355 	},
2356 #endif
2357 	{
2358 		.procname	= "ngroups_max",
2359 		.data		= &ngroups_max,
2360 		.maxlen		= sizeof (int),
2361 		.mode		= 0444,
2362 		.proc_handler	= proc_dointvec,
2363 	},
2364 	{
2365 		.procname	= "cap_last_cap",
2366 		.data		= (void *)&cap_last_cap,
2367 		.maxlen		= sizeof(int),
2368 		.mode		= 0444,
2369 		.proc_handler	= proc_dointvec,
2370 	},
2371 #if defined(CONFIG_LOCKUP_DETECTOR)
2372 	{
2373 		.procname       = "watchdog",
2374 		.data		= &watchdog_user_enabled,
2375 		.maxlen		= sizeof(int),
2376 		.mode		= 0644,
2377 		.proc_handler   = proc_watchdog,
2378 		.extra1		= SYSCTL_ZERO,
2379 		.extra2		= SYSCTL_ONE,
2380 	},
2381 	{
2382 		.procname	= "watchdog_thresh",
2383 		.data		= &watchdog_thresh,
2384 		.maxlen		= sizeof(int),
2385 		.mode		= 0644,
2386 		.proc_handler	= proc_watchdog_thresh,
2387 		.extra1		= SYSCTL_ZERO,
2388 		.extra2		= &sixty,
2389 	},
2390 	{
2391 		.procname       = "nmi_watchdog",
2392 		.data		= &nmi_watchdog_user_enabled,
2393 		.maxlen		= sizeof(int),
2394 		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
2395 		.proc_handler   = proc_nmi_watchdog,
2396 		.extra1		= SYSCTL_ZERO,
2397 		.extra2		= SYSCTL_ONE,
2398 	},
2399 	{
2400 		.procname	= "watchdog_cpumask",
2401 		.data		= &watchdog_cpumask_bits,
2402 		.maxlen		= NR_CPUS,
2403 		.mode		= 0644,
2404 		.proc_handler	= proc_watchdog_cpumask,
2405 	},
2406 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
2407 	{
2408 		.procname       = "soft_watchdog",
2409 		.data		= &soft_watchdog_user_enabled,
2410 		.maxlen		= sizeof(int),
2411 		.mode		= 0644,
2412 		.proc_handler   = proc_soft_watchdog,
2413 		.extra1		= SYSCTL_ZERO,
2414 		.extra2		= SYSCTL_ONE,
2415 	},
2416 	{
2417 		.procname	= "softlockup_panic",
2418 		.data		= &softlockup_panic,
2419 		.maxlen		= sizeof(int),
2420 		.mode		= 0644,
2421 		.proc_handler	= proc_dointvec_minmax,
2422 		.extra1		= SYSCTL_ZERO,
2423 		.extra2		= SYSCTL_ONE,
2424 	},
2425 #ifdef CONFIG_SMP
2426 	{
2427 		.procname	= "softlockup_all_cpu_backtrace",
2428 		.data		= &sysctl_softlockup_all_cpu_backtrace,
2429 		.maxlen		= sizeof(int),
2430 		.mode		= 0644,
2431 		.proc_handler	= proc_dointvec_minmax,
2432 		.extra1		= SYSCTL_ZERO,
2433 		.extra2		= SYSCTL_ONE,
2434 	},
2435 #endif /* CONFIG_SMP */
2436 #endif
2437 #ifdef CONFIG_HARDLOCKUP_DETECTOR
2438 	{
2439 		.procname	= "hardlockup_panic",
2440 		.data		= &hardlockup_panic,
2441 		.maxlen		= sizeof(int),
2442 		.mode		= 0644,
2443 		.proc_handler	= proc_dointvec_minmax,
2444 		.extra1		= SYSCTL_ZERO,
2445 		.extra2		= SYSCTL_ONE,
2446 	},
2447 #ifdef CONFIG_SMP
2448 	{
2449 		.procname	= "hardlockup_all_cpu_backtrace",
2450 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
2451 		.maxlen		= sizeof(int),
2452 		.mode		= 0644,
2453 		.proc_handler	= proc_dointvec_minmax,
2454 		.extra1		= SYSCTL_ZERO,
2455 		.extra2		= SYSCTL_ONE,
2456 	},
2457 #endif /* CONFIG_SMP */
2458 #endif
2459 #endif
2460 
2461 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
2462 	{
2463 		.procname       = "unknown_nmi_panic",
2464 		.data           = &unknown_nmi_panic,
2465 		.maxlen         = sizeof (int),
2466 		.mode           = 0644,
2467 		.proc_handler   = proc_dointvec,
2468 	},
2469 #endif
2470 
2471 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
2472 	defined(CONFIG_DEBUG_STACKOVERFLOW)
2473 	{
2474 		.procname	= "panic_on_stackoverflow",
2475 		.data		= &sysctl_panic_on_stackoverflow,
2476 		.maxlen		= sizeof(int),
2477 		.mode		= 0644,
2478 		.proc_handler	= proc_dointvec,
2479 	},
2480 #endif
2481 #if defined(CONFIG_X86)
2482 	{
2483 		.procname	= "panic_on_unrecovered_nmi",
2484 		.data		= &panic_on_unrecovered_nmi,
2485 		.maxlen		= sizeof(int),
2486 		.mode		= 0644,
2487 		.proc_handler	= proc_dointvec,
2488 	},
2489 	{
2490 		.procname	= "panic_on_io_nmi",
2491 		.data		= &panic_on_io_nmi,
2492 		.maxlen		= sizeof(int),
2493 		.mode		= 0644,
2494 		.proc_handler	= proc_dointvec,
2495 	},
2496 	{
2497 		.procname	= "bootloader_type",
2498 		.data		= &bootloader_type,
2499 		.maxlen		= sizeof (int),
2500 		.mode		= 0444,
2501 		.proc_handler	= proc_dointvec,
2502 	},
2503 	{
2504 		.procname	= "bootloader_version",
2505 		.data		= &bootloader_version,
2506 		.maxlen		= sizeof (int),
2507 		.mode		= 0444,
2508 		.proc_handler	= proc_dointvec,
2509 	},
2510 	{
2511 		.procname	= "io_delay_type",
2512 		.data		= &io_delay_type,
2513 		.maxlen		= sizeof(int),
2514 		.mode		= 0644,
2515 		.proc_handler	= proc_dointvec,
2516 	},
2517 #endif
2518 #if defined(CONFIG_MMU)
2519 	{
2520 		.procname	= "randomize_va_space",
2521 		.data		= &randomize_va_space,
2522 		.maxlen		= sizeof(int),
2523 		.mode		= 0644,
2524 		.proc_handler	= proc_dointvec,
2525 	},
2526 #endif
2527 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
2528 	{
2529 		.procname	= "spin_retry",
2530 		.data		= &spin_retry,
2531 		.maxlen		= sizeof (int),
2532 		.mode		= 0644,
2533 		.proc_handler	= proc_dointvec,
2534 	},
2535 #endif
2536 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
2537 	{
2538 		.procname	= "acpi_video_flags",
2539 		.data		= &acpi_realmode_flags,
2540 		.maxlen		= sizeof (unsigned long),
2541 		.mode		= 0644,
2542 		.proc_handler	= proc_doulongvec_minmax,
2543 	},
2544 #endif
2545 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
2546 	{
2547 		.procname	= "ignore-unaligned-usertrap",
2548 		.data		= &no_unaligned_warning,
2549 		.maxlen		= sizeof (int),
2550 		.mode		= 0644,
2551 		.proc_handler	= proc_dointvec,
2552 	},
2553 #endif
2554 #ifdef CONFIG_IA64
2555 	{
2556 		.procname	= "unaligned-dump-stack",
2557 		.data		= &unaligned_dump_stack,
2558 		.maxlen		= sizeof (int),
2559 		.mode		= 0644,
2560 		.proc_handler	= proc_dointvec,
2561 	},
2562 #endif
2563 #ifdef CONFIG_DETECT_HUNG_TASK
2564 #ifdef CONFIG_SMP
2565 	{
2566 		.procname	= "hung_task_all_cpu_backtrace",
2567 		.data		= &sysctl_hung_task_all_cpu_backtrace,
2568 		.maxlen		= sizeof(int),
2569 		.mode		= 0644,
2570 		.proc_handler	= proc_dointvec_minmax,
2571 		.extra1		= SYSCTL_ZERO,
2572 		.extra2		= SYSCTL_ONE,
2573 	},
2574 #endif /* CONFIG_SMP */
2575 	{
2576 		.procname	= "hung_task_panic",
2577 		.data		= &sysctl_hung_task_panic,
2578 		.maxlen		= sizeof(int),
2579 		.mode		= 0644,
2580 		.proc_handler	= proc_dointvec_minmax,
2581 		.extra1		= SYSCTL_ZERO,
2582 		.extra2		= SYSCTL_ONE,
2583 	},
2584 	{
2585 		.procname	= "hung_task_check_count",
2586 		.data		= &sysctl_hung_task_check_count,
2587 		.maxlen		= sizeof(int),
2588 		.mode		= 0644,
2589 		.proc_handler	= proc_dointvec_minmax,
2590 		.extra1		= SYSCTL_ZERO,
2591 	},
2592 	{
2593 		.procname	= "hung_task_timeout_secs",
2594 		.data		= &sysctl_hung_task_timeout_secs,
2595 		.maxlen		= sizeof(unsigned long),
2596 		.mode		= 0644,
2597 		.proc_handler	= proc_dohung_task_timeout_secs,
2598 		.extra2		= &hung_task_timeout_max,
2599 	},
2600 	{
2601 		.procname	= "hung_task_check_interval_secs",
2602 		.data		= &sysctl_hung_task_check_interval_secs,
2603 		.maxlen		= sizeof(unsigned long),
2604 		.mode		= 0644,
2605 		.proc_handler	= proc_dohung_task_timeout_secs,
2606 		.extra2		= &hung_task_timeout_max,
2607 	},
2608 	{
2609 		.procname	= "hung_task_warnings",
2610 		.data		= &sysctl_hung_task_warnings,
2611 		.maxlen		= sizeof(int),
2612 		.mode		= 0644,
2613 		.proc_handler	= proc_dointvec_minmax,
2614 		.extra1		= SYSCTL_NEG_ONE,
2615 	},
2616 #endif
2617 #ifdef CONFIG_RT_MUTEXES
2618 	{
2619 		.procname	= "max_lock_depth",
2620 		.data		= &max_lock_depth,
2621 		.maxlen		= sizeof(int),
2622 		.mode		= 0644,
2623 		.proc_handler	= proc_dointvec,
2624 	},
2625 #endif
2626 	{
2627 		.procname	= "poweroff_cmd",
2628 		.data		= &poweroff_cmd,
2629 		.maxlen		= POWEROFF_CMD_PATH_LEN,
2630 		.mode		= 0644,
2631 		.proc_handler	= proc_dostring,
2632 	},
2633 #ifdef CONFIG_KEYS
2634 	{
2635 		.procname	= "keys",
2636 		.mode		= 0555,
2637 		.child		= key_sysctls,
2638 	},
2639 #endif
2640 #ifdef CONFIG_PERF_EVENTS
2641 	/*
2642 	 * User-space scripts rely on the existence of this file
2643 	 * as a feature check for perf_events being enabled.
2644 	 *
2645 	 * So it's an ABI, do not remove!
2646 	 */
2647 	{
2648 		.procname	= "perf_event_paranoid",
2649 		.data		= &sysctl_perf_event_paranoid,
2650 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2651 		.mode		= 0644,
2652 		.proc_handler	= proc_dointvec,
2653 	},
2654 	{
2655 		.procname	= "perf_event_mlock_kb",
2656 		.data		= &sysctl_perf_event_mlock,
2657 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2658 		.mode		= 0644,
2659 		.proc_handler	= proc_dointvec,
2660 	},
2661 	{
2662 		.procname	= "perf_event_max_sample_rate",
2663 		.data		= &sysctl_perf_event_sample_rate,
2664 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2665 		.mode		= 0644,
2666 		.proc_handler	= perf_proc_update_handler,
2667 		.extra1		= SYSCTL_ONE,
2668 	},
2669 	{
2670 		.procname	= "perf_cpu_time_max_percent",
2671 		.data		= &sysctl_perf_cpu_time_max_percent,
2672 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2673 		.mode		= 0644,
2674 		.proc_handler	= perf_cpu_time_max_percent_handler,
2675 		.extra1		= SYSCTL_ZERO,
2676 		.extra2		= SYSCTL_ONE_HUNDRED,
2677 	},
2678 	{
2679 		.procname	= "perf_event_max_stack",
2680 		.data		= &sysctl_perf_event_max_stack,
2681 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2682 		.mode		= 0644,
2683 		.proc_handler	= perf_event_max_stack_handler,
2684 		.extra1		= SYSCTL_ZERO,
2685 		.extra2		= &six_hundred_forty_kb,
2686 	},
2687 	{
2688 		.procname	= "perf_event_max_contexts_per_stack",
2689 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2690 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2691 		.mode		= 0644,
2692 		.proc_handler	= perf_event_max_stack_handler,
2693 		.extra1		= SYSCTL_ZERO,
2694 		.extra2		= SYSCTL_ONE_THOUSAND,
2695 	},
2696 #endif
2697 	{
2698 		.procname	= "panic_on_warn",
2699 		.data		= &panic_on_warn,
2700 		.maxlen		= sizeof(int),
2701 		.mode		= 0644,
2702 		.proc_handler	= proc_dointvec_minmax,
2703 		.extra1		= SYSCTL_ZERO,
2704 		.extra2		= SYSCTL_ONE,
2705 	},
2706 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
2707 	{
2708 		.procname	= "timer_migration",
2709 		.data		= &sysctl_timer_migration,
2710 		.maxlen		= sizeof(unsigned int),
2711 		.mode		= 0644,
2712 		.proc_handler	= timer_migration_handler,
2713 		.extra1		= SYSCTL_ZERO,
2714 		.extra2		= SYSCTL_ONE,
2715 	},
2716 #endif
2717 #ifdef CONFIG_BPF_SYSCALL
2718 	{
2719 		.procname	= "unprivileged_bpf_disabled",
2720 		.data		= &sysctl_unprivileged_bpf_disabled,
2721 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
2722 		.mode		= 0644,
2723 		.proc_handler	= bpf_unpriv_handler,
2724 		.extra1		= SYSCTL_ZERO,
2725 		.extra2		= SYSCTL_TWO,
2726 	},
2727 	{
2728 		.procname	= "bpf_stats_enabled",
2729 		.data		= &bpf_stats_enabled_key.key,
2730 		.maxlen		= sizeof(bpf_stats_enabled_key),
2731 		.mode		= 0644,
2732 		.proc_handler	= bpf_stats_handler,
2733 	},
2734 #endif
2735 #if defined(CONFIG_TREE_RCU)
2736 	{
2737 		.procname	= "panic_on_rcu_stall",
2738 		.data		= &sysctl_panic_on_rcu_stall,
2739 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2740 		.mode		= 0644,
2741 		.proc_handler	= proc_dointvec_minmax,
2742 		.extra1		= SYSCTL_ZERO,
2743 		.extra2		= SYSCTL_ONE,
2744 	},
2745 #endif
2746 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
2747 	{
2748 		.procname	= "stack_erasing",
2749 		.data		= NULL,
2750 		.maxlen		= sizeof(int),
2751 		.mode		= 0600,
2752 		.proc_handler	= stack_erasing_sysctl,
2753 		.extra1		= SYSCTL_ZERO,
2754 		.extra2		= SYSCTL_ONE,
2755 	},
2756 #endif
2757 	{ }
2758 };
2759 
2760 static struct ctl_table vm_table[] = {
2761 	{
2762 		.procname	= "overcommit_memory",
2763 		.data		= &sysctl_overcommit_memory,
2764 		.maxlen		= sizeof(sysctl_overcommit_memory),
2765 		.mode		= 0644,
2766 		.proc_handler	= overcommit_policy_handler,
2767 		.extra1		= SYSCTL_ZERO,
2768 		.extra2		= SYSCTL_TWO,
2769 	},
2770 	{
2771 		.procname	= "panic_on_oom",
2772 		.data		= &sysctl_panic_on_oom,
2773 		.maxlen		= sizeof(sysctl_panic_on_oom),
2774 		.mode		= 0644,
2775 		.proc_handler	= proc_dointvec_minmax,
2776 		.extra1		= SYSCTL_ZERO,
2777 		.extra2		= SYSCTL_TWO,
2778 	},
2779 	{
2780 		.procname	= "oom_kill_allocating_task",
2781 		.data		= &sysctl_oom_kill_allocating_task,
2782 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
2783 		.mode		= 0644,
2784 		.proc_handler	= proc_dointvec,
2785 	},
2786 	{
2787 		.procname	= "oom_dump_tasks",
2788 		.data		= &sysctl_oom_dump_tasks,
2789 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
2790 		.mode		= 0644,
2791 		.proc_handler	= proc_dointvec,
2792 	},
2793 	{
2794 		.procname	= "overcommit_ratio",
2795 		.data		= &sysctl_overcommit_ratio,
2796 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2797 		.mode		= 0644,
2798 		.proc_handler	= overcommit_ratio_handler,
2799 	},
2800 	{
2801 		.procname	= "overcommit_kbytes",
2802 		.data		= &sysctl_overcommit_kbytes,
2803 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2804 		.mode		= 0644,
2805 		.proc_handler	= overcommit_kbytes_handler,
2806 	},
2807 	{
2808 		.procname	= "page-cluster",
2809 		.data		= &page_cluster,
2810 		.maxlen		= sizeof(int),
2811 		.mode		= 0644,
2812 		.proc_handler	= proc_dointvec_minmax,
2813 		.extra1		= SYSCTL_ZERO,
2814 	},
2815 	{
2816 		.procname	= "dirty_background_ratio",
2817 		.data		= &dirty_background_ratio,
2818 		.maxlen		= sizeof(dirty_background_ratio),
2819 		.mode		= 0644,
2820 		.proc_handler	= dirty_background_ratio_handler,
2821 		.extra1		= SYSCTL_ZERO,
2822 		.extra2		= SYSCTL_ONE_HUNDRED,
2823 	},
2824 	{
2825 		.procname	= "dirty_background_bytes",
2826 		.data		= &dirty_background_bytes,
2827 		.maxlen		= sizeof(dirty_background_bytes),
2828 		.mode		= 0644,
2829 		.proc_handler	= dirty_background_bytes_handler,
2830 		.extra1		= &one_ul,
2831 	},
2832 	{
2833 		.procname	= "dirty_ratio",
2834 		.data		= &vm_dirty_ratio,
2835 		.maxlen		= sizeof(vm_dirty_ratio),
2836 		.mode		= 0644,
2837 		.proc_handler	= dirty_ratio_handler,
2838 		.extra1		= SYSCTL_ZERO,
2839 		.extra2		= SYSCTL_ONE_HUNDRED,
2840 	},
2841 	{
2842 		.procname	= "dirty_bytes",
2843 		.data		= &vm_dirty_bytes,
2844 		.maxlen		= sizeof(vm_dirty_bytes),
2845 		.mode		= 0644,
2846 		.proc_handler	= dirty_bytes_handler,
2847 		.extra1		= &dirty_bytes_min,
2848 	},
2849 	{
2850 		.procname	= "dirty_writeback_centisecs",
2851 		.data		= &dirty_writeback_interval,
2852 		.maxlen		= sizeof(dirty_writeback_interval),
2853 		.mode		= 0644,
2854 		.proc_handler	= dirty_writeback_centisecs_handler,
2855 	},
2856 	{
2857 		.procname	= "dirty_expire_centisecs",
2858 		.data		= &dirty_expire_interval,
2859 		.maxlen		= sizeof(dirty_expire_interval),
2860 		.mode		= 0644,
2861 		.proc_handler	= proc_dointvec_minmax,
2862 		.extra1		= SYSCTL_ZERO,
2863 	},
2864 	{
2865 		.procname	= "dirtytime_expire_seconds",
2866 		.data		= &dirtytime_expire_interval,
2867 		.maxlen		= sizeof(dirtytime_expire_interval),
2868 		.mode		= 0644,
2869 		.proc_handler	= dirtytime_interval_handler,
2870 		.extra1		= SYSCTL_ZERO,
2871 	},
2872 	{
2873 		.procname	= "swappiness",
2874 		.data		= &vm_swappiness,
2875 		.maxlen		= sizeof(vm_swappiness),
2876 		.mode		= 0644,
2877 		.proc_handler	= proc_dointvec_minmax,
2878 		.extra1		= SYSCTL_ZERO,
2879 		.extra2		= SYSCTL_TWO_HUNDRED,
2880 	},
2881 #ifdef CONFIG_NUMA
2882 	{
2883 		.procname	= "numa_stat",
2884 		.data		= &sysctl_vm_numa_stat,
2885 		.maxlen		= sizeof(int),
2886 		.mode		= 0644,
2887 		.proc_handler	= sysctl_vm_numa_stat_handler,
2888 		.extra1		= SYSCTL_ZERO,
2889 		.extra2		= SYSCTL_ONE,
2890 	},
2891 #endif
2892 #ifdef CONFIG_HUGETLB_PAGE
2893 	{
2894 		.procname	= "nr_hugepages",
2895 		.data		= NULL,
2896 		.maxlen		= sizeof(unsigned long),
2897 		.mode		= 0644,
2898 		.proc_handler	= hugetlb_sysctl_handler,
2899 	},
2900 #ifdef CONFIG_NUMA
2901 	{
2902 		.procname       = "nr_hugepages_mempolicy",
2903 		.data           = NULL,
2904 		.maxlen         = sizeof(unsigned long),
2905 		.mode           = 0644,
2906 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2907 	},
2908 #endif
2909 	 {
2910 		.procname	= "hugetlb_shm_group",
2911 		.data		= &sysctl_hugetlb_shm_group,
2912 		.maxlen		= sizeof(gid_t),
2913 		.mode		= 0644,
2914 		.proc_handler	= proc_dointvec,
2915 	 },
2916 	{
2917 		.procname	= "nr_overcommit_hugepages",
2918 		.data		= NULL,
2919 		.maxlen		= sizeof(unsigned long),
2920 		.mode		= 0644,
2921 		.proc_handler	= hugetlb_overcommit_handler,
2922 	},
2923 #endif
2924 	{
2925 		.procname	= "lowmem_reserve_ratio",
2926 		.data		= &sysctl_lowmem_reserve_ratio,
2927 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2928 		.mode		= 0644,
2929 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2930 	},
2931 	{
2932 		.procname	= "drop_caches",
2933 		.data		= &sysctl_drop_caches,
2934 		.maxlen		= sizeof(int),
2935 		.mode		= 0200,
2936 		.proc_handler	= drop_caches_sysctl_handler,
2937 		.extra1		= SYSCTL_ONE,
2938 		.extra2		= SYSCTL_FOUR,
2939 	},
2940 #ifdef CONFIG_COMPACTION
2941 	{
2942 		.procname	= "compact_memory",
2943 		.data		= &sysctl_compact_memory,
2944 		.maxlen		= sizeof(int),
2945 		.mode		= 0200,
2946 		.proc_handler	= sysctl_compaction_handler,
2947 	},
2948 	{
2949 		.procname	= "compaction_proactiveness",
2950 		.data		= &sysctl_compaction_proactiveness,
2951 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2952 		.mode		= 0644,
2953 		.proc_handler	= compaction_proactiveness_sysctl_handler,
2954 		.extra1		= SYSCTL_ZERO,
2955 		.extra2		= SYSCTL_ONE_HUNDRED,
2956 	},
2957 	{
2958 		.procname	= "extfrag_threshold",
2959 		.data		= &sysctl_extfrag_threshold,
2960 		.maxlen		= sizeof(int),
2961 		.mode		= 0644,
2962 		.proc_handler	= proc_dointvec_minmax,
2963 		.extra1		= &min_extfrag_threshold,
2964 		.extra2		= &max_extfrag_threshold,
2965 	},
2966 	{
2967 		.procname	= "compact_unevictable_allowed",
2968 		.data		= &sysctl_compact_unevictable_allowed,
2969 		.maxlen		= sizeof(int),
2970 		.mode		= 0644,
2971 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2972 		.extra1		= SYSCTL_ZERO,
2973 		.extra2		= SYSCTL_ONE,
2974 	},
2975 
2976 #endif /* CONFIG_COMPACTION */
2977 	{
2978 		.procname	= "min_free_kbytes",
2979 		.data		= &min_free_kbytes,
2980 		.maxlen		= sizeof(min_free_kbytes),
2981 		.mode		= 0644,
2982 		.proc_handler	= min_free_kbytes_sysctl_handler,
2983 		.extra1		= SYSCTL_ZERO,
2984 	},
2985 	{
2986 		.procname	= "watermark_boost_factor",
2987 		.data		= &watermark_boost_factor,
2988 		.maxlen		= sizeof(watermark_boost_factor),
2989 		.mode		= 0644,
2990 		.proc_handler	= proc_dointvec_minmax,
2991 		.extra1		= SYSCTL_ZERO,
2992 	},
2993 	{
2994 		.procname	= "watermark_scale_factor",
2995 		.data		= &watermark_scale_factor,
2996 		.maxlen		= sizeof(watermark_scale_factor),
2997 		.mode		= 0644,
2998 		.proc_handler	= watermark_scale_factor_sysctl_handler,
2999 		.extra1		= SYSCTL_ONE,
3000 		.extra2		= SYSCTL_THREE_THOUSAND,
3001 	},
3002 	{
3003 		.procname	= "extra_free_kbytes",
3004 		.data		= &extra_free_kbytes,
3005 		.maxlen		= sizeof(extra_free_kbytes),
3006 		.mode		= 0644,
3007 		.proc_handler	= min_free_kbytes_sysctl_handler,
3008 		.extra1		= SYSCTL_ZERO,
3009 	},
3010 	{
3011 		.procname	= "percpu_pagelist_fraction",
3012 		.data		= &percpu_pagelist_fraction,
3013 		.maxlen		= sizeof(percpu_pagelist_fraction),
3014 		.mode		= 0644,
3015 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
3016 		.extra1		= SYSCTL_ZERO,
3017 	},
3018 	{
3019 		.procname	= "page_lock_unfairness",
3020 		.data		= &sysctl_page_lock_unfairness,
3021 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
3022 		.mode		= 0644,
3023 		.proc_handler	= proc_dointvec_minmax,
3024 		.extra1		= SYSCTL_ZERO,
3025 	},
3026 #ifdef CONFIG_MMU
3027 	{
3028 		.procname	= "max_map_count",
3029 		.data		= &sysctl_max_map_count,
3030 		.maxlen		= sizeof(sysctl_max_map_count),
3031 		.mode		= 0644,
3032 		.proc_handler	= proc_dointvec_minmax,
3033 		.extra1		= SYSCTL_ZERO,
3034 	},
3035 #else
3036 	{
3037 		.procname	= "nr_trim_pages",
3038 		.data		= &sysctl_nr_trim_pages,
3039 		.maxlen		= sizeof(sysctl_nr_trim_pages),
3040 		.mode		= 0644,
3041 		.proc_handler	= proc_dointvec_minmax,
3042 		.extra1		= SYSCTL_ZERO,
3043 	},
3044 #endif
3045 	{
3046 		.procname	= "laptop_mode",
3047 		.data		= &laptop_mode,
3048 		.maxlen		= sizeof(laptop_mode),
3049 		.mode		= 0644,
3050 		.proc_handler	= proc_dointvec_jiffies,
3051 	},
3052 	{
3053 		.procname	= "block_dump",
3054 		.data		= &block_dump,
3055 		.maxlen		= sizeof(block_dump),
3056 		.mode		= 0644,
3057 		.proc_handler	= proc_dointvec_minmax,
3058 		.extra1		= SYSCTL_ZERO,
3059 	},
3060 	{
3061 		.procname	= "vfs_cache_pressure",
3062 		.data		= &sysctl_vfs_cache_pressure,
3063 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
3064 		.mode		= 0644,
3065 		.proc_handler	= proc_dointvec_minmax,
3066 		.extra1		= SYSCTL_ZERO,
3067 	},
3068 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
3069     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
3070 	{
3071 		.procname	= "legacy_va_layout",
3072 		.data		= &sysctl_legacy_va_layout,
3073 		.maxlen		= sizeof(sysctl_legacy_va_layout),
3074 		.mode		= 0644,
3075 		.proc_handler	= proc_dointvec_minmax,
3076 		.extra1		= SYSCTL_ZERO,
3077 	},
3078 #endif
3079 #ifdef CONFIG_NUMA
3080 	{
3081 		.procname	= "zone_reclaim_mode",
3082 		.data		= &node_reclaim_mode,
3083 		.maxlen		= sizeof(node_reclaim_mode),
3084 		.mode		= 0644,
3085 		.proc_handler	= proc_dointvec_minmax,
3086 		.extra1		= SYSCTL_ZERO,
3087 	},
3088 	{
3089 		.procname	= "min_unmapped_ratio",
3090 		.data		= &sysctl_min_unmapped_ratio,
3091 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
3092 		.mode		= 0644,
3093 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
3094 		.extra1		= SYSCTL_ZERO,
3095 		.extra2		= SYSCTL_ONE_HUNDRED,
3096 	},
3097 	{
3098 		.procname	= "min_slab_ratio",
3099 		.data		= &sysctl_min_slab_ratio,
3100 		.maxlen		= sizeof(sysctl_min_slab_ratio),
3101 		.mode		= 0644,
3102 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
3103 		.extra1		= SYSCTL_ZERO,
3104 		.extra2		= SYSCTL_ONE_HUNDRED,
3105 	},
3106 #endif
3107 #ifdef CONFIG_SMP
3108 	{
3109 		.procname	= "stat_interval",
3110 		.data		= &sysctl_stat_interval,
3111 		.maxlen		= sizeof(sysctl_stat_interval),
3112 		.mode		= 0644,
3113 		.proc_handler	= proc_dointvec_jiffies,
3114 	},
3115 	{
3116 		.procname	= "stat_refresh",
3117 		.data		= NULL,
3118 		.maxlen		= 0,
3119 		.mode		= 0600,
3120 		.proc_handler	= vmstat_refresh,
3121 	},
3122 #endif
3123 #ifdef CONFIG_MMU
3124 	{
3125 		.procname	= "mmap_min_addr",
3126 		.data		= &dac_mmap_min_addr,
3127 		.maxlen		= sizeof(unsigned long),
3128 		.mode		= 0644,
3129 		.proc_handler	= mmap_min_addr_handler,
3130 	},
3131 #endif
3132 #ifdef CONFIG_NUMA
3133 	{
3134 		.procname	= "numa_zonelist_order",
3135 		.data		= &numa_zonelist_order,
3136 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
3137 		.mode		= 0644,
3138 		.proc_handler	= numa_zonelist_order_handler,
3139 	},
3140 #endif
3141 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
3142    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
3143 	{
3144 		.procname	= "vdso_enabled",
3145 #ifdef CONFIG_X86_32
3146 		.data		= &vdso32_enabled,
3147 		.maxlen		= sizeof(vdso32_enabled),
3148 #else
3149 		.data		= &vdso_enabled,
3150 		.maxlen		= sizeof(vdso_enabled),
3151 #endif
3152 		.mode		= 0644,
3153 		.proc_handler	= proc_dointvec,
3154 		.extra1		= SYSCTL_ZERO,
3155 	},
3156 #endif
3157 #ifdef CONFIG_HIGHMEM
3158 	{
3159 		.procname	= "highmem_is_dirtyable",
3160 		.data		= &vm_highmem_is_dirtyable,
3161 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
3162 		.mode		= 0644,
3163 		.proc_handler	= proc_dointvec_minmax,
3164 		.extra1		= SYSCTL_ZERO,
3165 		.extra2		= SYSCTL_ONE,
3166 	},
3167 #endif
3168 #ifdef CONFIG_MEMORY_FAILURE
3169 	{
3170 		.procname	= "memory_failure_early_kill",
3171 		.data		= &sysctl_memory_failure_early_kill,
3172 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
3173 		.mode		= 0644,
3174 		.proc_handler	= proc_dointvec_minmax,
3175 		.extra1		= SYSCTL_ZERO,
3176 		.extra2		= SYSCTL_ONE,
3177 	},
3178 	{
3179 		.procname	= "memory_failure_recovery",
3180 		.data		= &sysctl_memory_failure_recovery,
3181 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
3182 		.mode		= 0644,
3183 		.proc_handler	= proc_dointvec_minmax,
3184 		.extra1		= SYSCTL_ZERO,
3185 		.extra2		= SYSCTL_ONE,
3186 	},
3187 #endif
3188 	{
3189 		.procname	= "user_reserve_kbytes",
3190 		.data		= &sysctl_user_reserve_kbytes,
3191 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
3192 		.mode		= 0644,
3193 		.proc_handler	= proc_doulongvec_minmax,
3194 	},
3195 	{
3196 		.procname	= "admin_reserve_kbytes",
3197 		.data		= &sysctl_admin_reserve_kbytes,
3198 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
3199 		.mode		= 0644,
3200 		.proc_handler	= proc_doulongvec_minmax,
3201 	},
3202 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
3203 	{
3204 		.procname	= "mmap_rnd_bits",
3205 		.data		= &mmap_rnd_bits,
3206 		.maxlen		= sizeof(mmap_rnd_bits),
3207 		.mode		= 0600,
3208 		.proc_handler	= proc_dointvec_minmax,
3209 		.extra1		= (void *)&mmap_rnd_bits_min,
3210 		.extra2		= (void *)&mmap_rnd_bits_max,
3211 	},
3212 #endif
3213 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
3214 	{
3215 		.procname	= "mmap_rnd_compat_bits",
3216 		.data		= &mmap_rnd_compat_bits,
3217 		.maxlen		= sizeof(mmap_rnd_compat_bits),
3218 		.mode		= 0600,
3219 		.proc_handler	= proc_dointvec_minmax,
3220 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
3221 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
3222 	},
3223 #endif
3224 #ifdef CONFIG_USERFAULTFD
3225 	{
3226 		.procname	= "unprivileged_userfaultfd",
3227 		.data		= &sysctl_unprivileged_userfaultfd,
3228 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
3229 		.mode		= 0644,
3230 		.proc_handler	= proc_dointvec_minmax,
3231 		.extra1		= SYSCTL_ZERO,
3232 		.extra2		= SYSCTL_ONE,
3233 	},
3234 #endif
3235 	{ }
3236 };
3237 
3238 static struct ctl_table fs_table[] = {
3239 	{
3240 		.procname	= "inode-nr",
3241 		.data		= &inodes_stat,
3242 		.maxlen		= 2*sizeof(long),
3243 		.mode		= 0444,
3244 		.proc_handler	= proc_nr_inodes,
3245 	},
3246 	{
3247 		.procname	= "inode-state",
3248 		.data		= &inodes_stat,
3249 		.maxlen		= 7*sizeof(long),
3250 		.mode		= 0444,
3251 		.proc_handler	= proc_nr_inodes,
3252 	},
3253 	{
3254 		.procname	= "file-nr",
3255 		.data		= &files_stat,
3256 		.maxlen		= sizeof(files_stat),
3257 		.mode		= 0444,
3258 		.proc_handler	= proc_nr_files,
3259 	},
3260 	{
3261 		.procname	= "file-max",
3262 		.data		= &files_stat.max_files,
3263 		.maxlen		= sizeof(files_stat.max_files),
3264 		.mode		= 0644,
3265 		.proc_handler	= proc_doulongvec_minmax,
3266 		.extra1		= &zero_ul,
3267 		.extra2		= &long_max,
3268 	},
3269 	{
3270 		.procname	= "nr_open",
3271 		.data		= &sysctl_nr_open,
3272 		.maxlen		= sizeof(unsigned int),
3273 		.mode		= 0644,
3274 		.proc_handler	= proc_dointvec_minmax,
3275 		.extra1		= &sysctl_nr_open_min,
3276 		.extra2		= &sysctl_nr_open_max,
3277 	},
3278 	{
3279 		.procname	= "dentry-state",
3280 		.data		= &dentry_stat,
3281 		.maxlen		= 6*sizeof(long),
3282 		.mode		= 0444,
3283 		.proc_handler	= proc_nr_dentry,
3284 	},
3285 	{
3286 		.procname	= "overflowuid",
3287 		.data		= &fs_overflowuid,
3288 		.maxlen		= sizeof(int),
3289 		.mode		= 0644,
3290 		.proc_handler	= proc_dointvec_minmax,
3291 		.extra1		= &minolduid,
3292 		.extra2		= &maxolduid,
3293 	},
3294 	{
3295 		.procname	= "overflowgid",
3296 		.data		= &fs_overflowgid,
3297 		.maxlen		= sizeof(int),
3298 		.mode		= 0644,
3299 		.proc_handler	= proc_dointvec_minmax,
3300 		.extra1		= &minolduid,
3301 		.extra2		= &maxolduid,
3302 	},
3303 #ifdef CONFIG_FILE_LOCKING
3304 	{
3305 		.procname	= "leases-enable",
3306 		.data		= &leases_enable,
3307 		.maxlen		= sizeof(int),
3308 		.mode		= 0644,
3309 		.proc_handler	= proc_dointvec,
3310 	},
3311 #endif
3312 #ifdef CONFIG_DNOTIFY
3313 	{
3314 		.procname	= "dir-notify-enable",
3315 		.data		= &dir_notify_enable,
3316 		.maxlen		= sizeof(int),
3317 		.mode		= 0644,
3318 		.proc_handler	= proc_dointvec,
3319 	},
3320 #endif
3321 #ifdef CONFIG_MMU
3322 #ifdef CONFIG_FILE_LOCKING
3323 	{
3324 		.procname	= "lease-break-time",
3325 		.data		= &lease_break_time,
3326 		.maxlen		= sizeof(int),
3327 		.mode		= 0644,
3328 		.proc_handler	= proc_dointvec,
3329 	},
3330 #endif
3331 #ifdef CONFIG_AIO
3332 	{
3333 		.procname	= "aio-nr",
3334 		.data		= &aio_nr,
3335 		.maxlen		= sizeof(aio_nr),
3336 		.mode		= 0444,
3337 		.proc_handler	= proc_doulongvec_minmax,
3338 	},
3339 	{
3340 		.procname	= "aio-max-nr",
3341 		.data		= &aio_max_nr,
3342 		.maxlen		= sizeof(aio_max_nr),
3343 		.mode		= 0644,
3344 		.proc_handler	= proc_doulongvec_minmax,
3345 	},
3346 #endif /* CONFIG_AIO */
3347 #ifdef CONFIG_INOTIFY_USER
3348 	{
3349 		.procname	= "inotify",
3350 		.mode		= 0555,
3351 		.child		= inotify_table,
3352 	},
3353 #endif
3354 #ifdef CONFIG_EPOLL
3355 	{
3356 		.procname	= "epoll",
3357 		.mode		= 0555,
3358 		.child		= epoll_table,
3359 	},
3360 #endif
3361 #endif
3362 	{
3363 		.procname	= "protected_symlinks",
3364 		.data		= &sysctl_protected_symlinks,
3365 		.maxlen		= sizeof(int),
3366 		.mode		= 0600,
3367 		.proc_handler	= proc_dointvec_minmax,
3368 		.extra1		= SYSCTL_ZERO,
3369 		.extra2		= SYSCTL_ONE,
3370 	},
3371 	{
3372 		.procname	= "protected_hardlinks",
3373 		.data		= &sysctl_protected_hardlinks,
3374 		.maxlen		= sizeof(int),
3375 		.mode		= 0600,
3376 		.proc_handler	= proc_dointvec_minmax,
3377 		.extra1		= SYSCTL_ZERO,
3378 		.extra2		= SYSCTL_ONE,
3379 	},
3380 	{
3381 		.procname	= "protected_fifos",
3382 		.data		= &sysctl_protected_fifos,
3383 		.maxlen		= sizeof(int),
3384 		.mode		= 0600,
3385 		.proc_handler	= proc_dointvec_minmax,
3386 		.extra1		= SYSCTL_ZERO,
3387 		.extra2		= SYSCTL_TWO,
3388 	},
3389 	{
3390 		.procname	= "protected_regular",
3391 		.data		= &sysctl_protected_regular,
3392 		.maxlen		= sizeof(int),
3393 		.mode		= 0600,
3394 		.proc_handler	= proc_dointvec_minmax,
3395 		.extra1		= SYSCTL_ZERO,
3396 		.extra2		= SYSCTL_TWO,
3397 	},
3398 	{
3399 		.procname	= "suid_dumpable",
3400 		.data		= &suid_dumpable,
3401 		.maxlen		= sizeof(int),
3402 		.mode		= 0644,
3403 		.proc_handler	= proc_dointvec_minmax_coredump,
3404 		.extra1		= SYSCTL_ZERO,
3405 		.extra2		= SYSCTL_TWO,
3406 	},
3407 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
3408 	{
3409 		.procname	= "binfmt_misc",
3410 		.mode		= 0555,
3411 		.child		= sysctl_mount_point,
3412 	},
3413 #endif
3414 	{
3415 		.procname	= "pipe-max-size",
3416 		.data		= &pipe_max_size,
3417 		.maxlen		= sizeof(pipe_max_size),
3418 		.mode		= 0644,
3419 		.proc_handler	= proc_dopipe_max_size,
3420 	},
3421 	{
3422 		.procname	= "pipe-user-pages-hard",
3423 		.data		= &pipe_user_pages_hard,
3424 		.maxlen		= sizeof(pipe_user_pages_hard),
3425 		.mode		= 0644,
3426 		.proc_handler	= proc_doulongvec_minmax,
3427 	},
3428 	{
3429 		.procname	= "pipe-user-pages-soft",
3430 		.data		= &pipe_user_pages_soft,
3431 		.maxlen		= sizeof(pipe_user_pages_soft),
3432 		.mode		= 0644,
3433 		.proc_handler	= proc_doulongvec_minmax,
3434 	},
3435 	{
3436 		.procname	= "mount-max",
3437 		.data		= &sysctl_mount_max,
3438 		.maxlen		= sizeof(unsigned int),
3439 		.mode		= 0644,
3440 		.proc_handler	= proc_dointvec_minmax,
3441 		.extra1		= SYSCTL_ONE,
3442 	},
3443 	{ }
3444 };
3445 
3446 static struct ctl_table debug_table[] = {
3447 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
3448 	{
3449 		.procname	= "exception-trace",
3450 		.data		= &show_unhandled_signals,
3451 		.maxlen		= sizeof(int),
3452 		.mode		= 0644,
3453 		.proc_handler	= proc_dointvec
3454 	},
3455 #endif
3456 #if defined(CONFIG_OPTPROBES)
3457 	{
3458 		.procname	= "kprobes-optimization",
3459 		.data		= &sysctl_kprobes_optimization,
3460 		.maxlen		= sizeof(int),
3461 		.mode		= 0644,
3462 		.proc_handler	= proc_kprobes_optimization_handler,
3463 		.extra1		= SYSCTL_ZERO,
3464 		.extra2		= SYSCTL_ONE,
3465 	},
3466 #endif
3467 	{ }
3468 };
3469 
3470 static struct ctl_table dev_table[] = {
3471 	{ }
3472 };
3473 
3474 static struct ctl_table sysctl_base_table[] = {
3475 	{
3476 		.procname	= "kernel",
3477 		.mode		= 0555,
3478 		.child		= kern_table,
3479 	},
3480 	{
3481 		.procname	= "vm",
3482 		.mode		= 0555,
3483 		.child		= vm_table,
3484 	},
3485 	{
3486 		.procname	= "fs",
3487 		.mode		= 0555,
3488 		.child		= fs_table,
3489 	},
3490 	{
3491 		.procname	= "debug",
3492 		.mode		= 0555,
3493 		.child		= debug_table,
3494 	},
3495 	{
3496 		.procname	= "dev",
3497 		.mode		= 0555,
3498 		.child		= dev_table,
3499 	},
3500 	{ }
3501 };
3502 
sysctl_init(void)3503 int __init sysctl_init(void)
3504 {
3505 	struct ctl_table_header *hdr;
3506 
3507 	hdr = register_sysctl_table(sysctl_base_table);
3508 	kmemleak_not_leak(hdr);
3509 	return 0;
3510 }
3511 #endif /* CONFIG_SYSCTL */
3512 /*
3513  * No sense putting this after each symbol definition, twice,
3514  * exception granted :-)
3515  */
3516 EXPORT_SYMBOL(proc_dointvec);
3517 EXPORT_SYMBOL(proc_douintvec);
3518 EXPORT_SYMBOL(proc_dointvec_jiffies);
3519 EXPORT_SYMBOL(proc_dointvec_minmax);
3520 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3521 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3522 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3523 EXPORT_SYMBOL(proc_dostring);
3524 EXPORT_SYMBOL(proc_doulongvec_minmax);
3525 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3526 EXPORT_SYMBOL(proc_do_large_bitmap);
3527