• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* drivers/misc/lowmemorykiller.c
2  *
3  * The lowmemorykiller driver lets user-space specify a set of memory thresholds
4  * where processes with a range of oom_score_adj values will get killed. Specify
5  * the minimum oom_score_adj values in
6  * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
7  * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
8  * separated list of numbers in ascending order.
9  *
10  * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
11  * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
12  * processes with a oom_score_adj value of 8 or higher when the free memory
13  * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
14  * higher when the free memory drops below 1024 pages.
15  *
16  * The driver considers memory used for caches to be free, but if a large
17  * percentage of the cached memory is locked this can be very inaccurate
18  * and processes may not get killed until the normal oom killer is triggered.
19  *
20  * Copyright (C) 2007-2008 Google, Inc.
21  *
22  * This software is licensed under the terms of the GNU General Public
23  * License version 2, as published by the Free Software Foundation, and
24  * may be copied, distributed, and modified under those terms.
25  *
26  * This program is distributed in the hope that it will be useful,
27  * but WITHOUT ANY WARRANTY; without even the implied warranty of
28  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29  * GNU General Public License for more details.
30  *
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/init.h>
36 #include <linux/moduleparam.h>
37 #include <linux/kernel.h>
38 #include <linux/mm.h>
39 #include <linux/oom.h>
40 #include <linux/sched.h>
41 #include <linux/swap.h>
42 #include <linux/rcupdate.h>
43 #include <linux/profile.h>
44 #include <linux/notifier.h>
45 #include <linux/circ_buf.h>
46 #include <linux/proc_fs.h>
47 #include <linux/slab.h>
48 #include <linux/poll.h>
49 
50 #define CREATE_TRACE_POINTS
51 #include "trace/lowmemorykiller.h"
52 
53 static uint32_t lowmem_debug_level = 1;
54 static short lowmem_adj[6] = {
55 	0,
56 	1,
57 	6,
58 	12,
59 };
60 static int lowmem_adj_size = 4;
61 static int lowmem_minfree[6] = {
62 	3 * 512,	/* 6MB */
63 	2 * 1024,	/* 8MB */
64 	4 * 1024,	/* 16MB */
65 	16 * 1024,	/* 64MB */
66 };
67 static int lowmem_minfree_size = 4;
68 
69 static unsigned long lowmem_deathpending_timeout;
70 
71 #define lowmem_print(level, x...)			\
72 	do {						\
73 		if (lowmem_debug_level >= (level))	\
74 			pr_info(x);			\
75 	} while (0)
76 
77 
78 static DECLARE_WAIT_QUEUE_HEAD(event_wait);
79 static DEFINE_SPINLOCK(lmk_event_lock);
80 static struct circ_buf event_buffer;
81 #define MAX_BUFFERED_EVENTS 8
82 #define MAX_TASKNAME 128
83 
84 struct lmk_event {
85 	char taskname[MAX_TASKNAME];
86 	pid_t pid;
87 	uid_t uid;
88 	pid_t group_leader_pid;
89 	unsigned long min_flt;
90 	unsigned long maj_flt;
91 	unsigned long rss_in_pages;
92 	short oom_score_adj;
93 	short min_score_adj;
94 	unsigned long long start_time;
95 	struct list_head list;
96 };
97 
handle_lmk_event(struct task_struct * selected,int selected_tasksize,short min_score_adj)98 void handle_lmk_event(struct task_struct *selected, int selected_tasksize,
99 		      short min_score_adj)
100 {
101 	int head;
102 	int tail;
103 	struct lmk_event *events;
104 	struct lmk_event *event;
105 
106 	spin_lock(&lmk_event_lock);
107 
108 	head = event_buffer.head;
109 	tail = READ_ONCE(event_buffer.tail);
110 
111 	/* Do not continue to log if no space remains in the buffer. */
112 	if (CIRC_SPACE(head, tail, MAX_BUFFERED_EVENTS) < 1) {
113 		spin_unlock(&lmk_event_lock);
114 		return;
115 	}
116 
117 	events = (struct lmk_event *) event_buffer.buf;
118 	event = &events[head];
119 
120 	strncpy(event->taskname, selected->comm, MAX_TASKNAME);
121 
122 	event->pid = selected->pid;
123 	event->uid = from_kuid_munged(current_user_ns(), task_uid(selected));
124 	if (selected->group_leader)
125 		event->group_leader_pid = selected->group_leader->pid;
126 	else
127 		event->group_leader_pid = -1;
128 	event->min_flt = selected->min_flt;
129 	event->maj_flt = selected->maj_flt;
130 	event->oom_score_adj = selected->signal->oom_score_adj;
131 	event->start_time = nsec_to_clock_t(selected->real_start_time);
132 	event->rss_in_pages = selected_tasksize;
133 	event->min_score_adj = min_score_adj;
134 
135 	event_buffer.head = (head + 1) & (MAX_BUFFERED_EVENTS - 1);
136 
137 	spin_unlock(&lmk_event_lock);
138 
139 	wake_up_interruptible(&event_wait);
140 }
141 
lmk_event_show(struct seq_file * s,void * unused)142 static int lmk_event_show(struct seq_file *s, void *unused)
143 {
144 	struct lmk_event *events = (struct lmk_event *) event_buffer.buf;
145 	int head;
146 	int tail;
147 	struct lmk_event *event;
148 
149 	spin_lock(&lmk_event_lock);
150 
151 	head = event_buffer.head;
152 	tail = event_buffer.tail;
153 
154 	if (head == tail) {
155 		spin_unlock(&lmk_event_lock);
156 		return -EAGAIN;
157 	}
158 
159 	event = &events[tail];
160 
161 	seq_printf(s, "%lu %lu %lu %lu %lu %lu %hd %hd %llu\n%s\n",
162 		(unsigned long) event->pid, (unsigned long) event->uid,
163 		(unsigned long) event->group_leader_pid, event->min_flt,
164 		event->maj_flt, event->rss_in_pages, event->oom_score_adj,
165 		event->min_score_adj, event->start_time, event->taskname);
166 
167 	event_buffer.tail = (tail + 1) & (MAX_BUFFERED_EVENTS - 1);
168 
169 	spin_unlock(&lmk_event_lock);
170 	return 0;
171 }
172 
lmk_event_poll(struct file * file,poll_table * wait)173 static unsigned int lmk_event_poll(struct file *file, poll_table *wait)
174 {
175 	int ret = 0;
176 
177 	poll_wait(file, &event_wait, wait);
178 	spin_lock(&lmk_event_lock);
179 	if (event_buffer.head != event_buffer.tail)
180 		ret = POLLIN;
181 	spin_unlock(&lmk_event_lock);
182 	return ret;
183 }
184 
lmk_event_open(struct inode * inode,struct file * file)185 static int lmk_event_open(struct inode *inode, struct file *file)
186 {
187 	return single_open(file, lmk_event_show, inode->i_private);
188 }
189 
190 static const struct file_operations event_file_ops = {
191 	.open = lmk_event_open,
192 	.poll = lmk_event_poll,
193 	.read = seq_read
194 };
195 
lmk_event_init(void)196 static void lmk_event_init(void)
197 {
198 	struct proc_dir_entry *entry;
199 
200 	event_buffer.head = 0;
201 	event_buffer.tail = 0;
202 	event_buffer.buf = kmalloc(
203 		sizeof(struct lmk_event) * MAX_BUFFERED_EVENTS, GFP_KERNEL);
204 	if (!event_buffer.buf)
205 		return;
206 	entry = proc_create("lowmemorykiller", 0, NULL, &event_file_ops);
207 	if (!entry)
208 		pr_err("error creating kernel lmk event file\n");
209 }
210 
lowmem_count(struct shrinker * s,struct shrink_control * sc)211 static unsigned long lowmem_count(struct shrinker *s,
212 				  struct shrink_control *sc)
213 {
214 	return global_page_state(NR_ACTIVE_ANON) +
215 		global_page_state(NR_ACTIVE_FILE) +
216 		global_page_state(NR_INACTIVE_ANON) +
217 		global_page_state(NR_INACTIVE_FILE);
218 }
219 
lowmem_scan(struct shrinker * s,struct shrink_control * sc)220 static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
221 {
222 	struct task_struct *tsk;
223 	struct task_struct *selected = NULL;
224 	unsigned long rem = 0;
225 	int tasksize;
226 	int i;
227 	short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
228 	int minfree = 0;
229 	int selected_tasksize = 0;
230 	short selected_oom_score_adj;
231 	int array_size = ARRAY_SIZE(lowmem_adj);
232 	int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
233 	int other_file = global_page_state(NR_FILE_PAGES) -
234 						global_page_state(NR_SHMEM) -
235 						global_page_state(NR_UNEVICTABLE) -
236 						total_swapcache_pages();
237 
238 	if (lowmem_adj_size < array_size)
239 		array_size = lowmem_adj_size;
240 	if (lowmem_minfree_size < array_size)
241 		array_size = lowmem_minfree_size;
242 	for (i = 0; i < array_size; i++) {
243 		minfree = lowmem_minfree[i];
244 		if (other_free < minfree && other_file < minfree) {
245 			min_score_adj = lowmem_adj[i];
246 			break;
247 		}
248 	}
249 
250 	lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
251 			sc->nr_to_scan, sc->gfp_mask, other_free,
252 			other_file, min_score_adj);
253 
254 	if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
255 		lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
256 			     sc->nr_to_scan, sc->gfp_mask);
257 		return 0;
258 	}
259 
260 	selected_oom_score_adj = min_score_adj;
261 
262 	rcu_read_lock();
263 	for_each_process(tsk) {
264 		struct task_struct *p;
265 		short oom_score_adj;
266 
267 		if (tsk->flags & PF_KTHREAD)
268 			continue;
269 
270 		p = find_lock_task_mm(tsk);
271 		if (!p)
272 			continue;
273 
274 		if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
275 		    time_before_eq(jiffies, lowmem_deathpending_timeout)) {
276 			task_unlock(p);
277 			rcu_read_unlock();
278 			return 0;
279 		}
280 		oom_score_adj = p->signal->oom_score_adj;
281 		if (oom_score_adj < min_score_adj) {
282 			task_unlock(p);
283 			continue;
284 		}
285 		tasksize = get_mm_rss(p->mm);
286 		task_unlock(p);
287 		if (tasksize <= 0)
288 			continue;
289 		if (selected) {
290 			if (oom_score_adj < selected_oom_score_adj)
291 				continue;
292 			if (oom_score_adj == selected_oom_score_adj &&
293 			    tasksize <= selected_tasksize)
294 				continue;
295 		}
296 		selected = p;
297 		selected_tasksize = tasksize;
298 		selected_oom_score_adj = oom_score_adj;
299 		lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
300 			     p->comm, p->pid, oom_score_adj, tasksize);
301 	}
302 	if (selected) {
303 		long cache_size = other_file * (long)(PAGE_SIZE / 1024);
304 		long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
305 		long free = other_free * (long)(PAGE_SIZE / 1024);
306 
307 		task_lock(selected);
308 		send_sig(SIGKILL, selected, 0);
309 		/*
310 		 * FIXME: lowmemorykiller shouldn't abuse global OOM killer
311 		 * infrastructure. There is no real reason why the selected
312 		 * task should have access to the memory reserves.
313 		 */
314 		if (selected->mm)
315 			mark_oom_victim(selected);
316 		task_unlock(selected);
317 		trace_lowmemory_kill(selected, cache_size, cache_limit, free);
318 		lowmem_print(1, "Killing '%s' (%d) (tgid %d), adj %hd,\n" \
319 			        "   to free %ldkB on behalf of '%s' (%d) because\n" \
320 			        "   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
321 			        "   Free memory is %ldkB above reserved\n",
322 			     selected->comm, selected->pid, selected->tgid,
323 			     selected_oom_score_adj,
324 			     selected_tasksize * (long)(PAGE_SIZE / 1024),
325 			     current->comm, current->pid,
326 			     cache_size, cache_limit,
327 			     min_score_adj,
328 			     free);
329 		lowmem_deathpending_timeout = jiffies + HZ;
330 		rem += selected_tasksize;
331 		get_task_struct(selected);
332 	}
333 
334 	lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
335 		     sc->nr_to_scan, sc->gfp_mask, rem);
336 	rcu_read_unlock();
337 
338 	if (selected) {
339 		handle_lmk_event(selected, selected_tasksize, min_score_adj);
340 		put_task_struct(selected);
341 	}
342 	return rem;
343 }
344 
345 static struct shrinker lowmem_shrinker = {
346 	.scan_objects = lowmem_scan,
347 	.count_objects = lowmem_count,
348 	.seeks = DEFAULT_SEEKS * 16
349 };
350 
lowmem_init(void)351 static int __init lowmem_init(void)
352 {
353 	register_shrinker(&lowmem_shrinker);
354 	lmk_event_init();
355 	return 0;
356 }
357 device_initcall(lowmem_init);
358 
359 #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
lowmem_oom_adj_to_oom_score_adj(short oom_adj)360 static short lowmem_oom_adj_to_oom_score_adj(short oom_adj)
361 {
362 	if (oom_adj == OOM_ADJUST_MAX)
363 		return OOM_SCORE_ADJ_MAX;
364 	else
365 		return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
366 }
367 
lowmem_autodetect_oom_adj_values(void)368 static void lowmem_autodetect_oom_adj_values(void)
369 {
370 	int i;
371 	short oom_adj;
372 	short oom_score_adj;
373 	int array_size = ARRAY_SIZE(lowmem_adj);
374 
375 	if (lowmem_adj_size < array_size)
376 		array_size = lowmem_adj_size;
377 
378 	if (array_size <= 0)
379 		return;
380 
381 	oom_adj = lowmem_adj[array_size - 1];
382 	if (oom_adj > OOM_ADJUST_MAX)
383 		return;
384 
385 	oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
386 	if (oom_score_adj <= OOM_ADJUST_MAX)
387 		return;
388 
389 	lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
390 	for (i = 0; i < array_size; i++) {
391 		oom_adj = lowmem_adj[i];
392 		oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
393 		lowmem_adj[i] = oom_score_adj;
394 		lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
395 			     oom_adj, oom_score_adj);
396 	}
397 }
398 
lowmem_adj_array_set(const char * val,const struct kernel_param * kp)399 static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
400 {
401 	int ret;
402 
403 	ret = param_array_ops.set(val, kp);
404 
405 	/* HACK: Autodetect oom_adj values in lowmem_adj array */
406 	lowmem_autodetect_oom_adj_values();
407 
408 	return ret;
409 }
410 
lowmem_adj_array_get(char * buffer,const struct kernel_param * kp)411 static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
412 {
413 	return param_array_ops.get(buffer, kp);
414 }
415 
lowmem_adj_array_free(void * arg)416 static void lowmem_adj_array_free(void *arg)
417 {
418 	param_array_ops.free(arg);
419 }
420 
421 static struct kernel_param_ops lowmem_adj_array_ops = {
422 	.set = lowmem_adj_array_set,
423 	.get = lowmem_adj_array_get,
424 	.free = lowmem_adj_array_free,
425 };
426 
427 static const struct kparam_array __param_arr_adj = {
428 	.max = ARRAY_SIZE(lowmem_adj),
429 	.num = &lowmem_adj_size,
430 	.ops = &param_ops_short,
431 	.elemsize = sizeof(lowmem_adj[0]),
432 	.elem = lowmem_adj,
433 };
434 #endif
435 
436 /*
437  * not really modular, but the easiest way to keep compat with existing
438  * bootargs behaviour is to continue using module_param here.
439  */
440 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
441 #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
442 module_param_cb(adj, &lowmem_adj_array_ops,
443 		.arr = &__param_arr_adj,
444 		S_IRUGO | S_IWUSR);
445 __MODULE_PARM_TYPE(adj, "array of short");
446 #else
447 module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
448 			 S_IRUGO | S_IWUSR);
449 #endif
450 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
451 			 S_IRUGO | S_IWUSR);
452 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
453 
454